00001 /* 00002 * OpenBIOS - free your system! 00003 * ( FCode tokenizer ) 00004 * 00005 * scanner.c - simple scanner for forth files. 00006 * 00007 * This program is part of a free implementation of the IEEE 1275-1994 00008 * Standard for Boot (Initialization Configuration) Firmware. 00009 * 00010 * Copyright (C) 2001-2005 by Stefan Reinauer <stepan@openbios.org> 00011 * 00012 * This program is free software; you can redistribute it and/or modify 00013 * it under the terms of the GNU General Public License as published by 00014 * the Free Software Foundation; version 2 of the License. 00015 * 00016 * This program is distributed in the hope that it will be useful, 00017 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00018 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00019 * GNU General Public License for more details. 00020 * 00021 * You should have received a copy of the GNU General Public License 00022 * along with this program; if not, write to the Free Software 00023 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA, 02110-1301 USA 00024 * 00025 */ 00026 00027 /* ************************************************************************** 00028 * Modifications made in 2005 by IBM Corporation 00029 * (C) Copyright 2005 IBM Corporation. All Rights Reserved. 00030 * Modifications Author: David L. Paktor dlpaktor@us.ibm.com 00031 **************************************************************************** */ 00032 00033 #include <stdio.h> 00034 #include <stdlib.h> 00035 #include <unistd.h> 00036 #ifdef __GLIBC__ 00037 #define __USE_XOPEN_EXTENDED 00038 #endif 00039 #include <string.h> 00040 #include <time.h> 00041 #include <ctype.h> 00042 00043 #include "macros.h" 00044 #include "stack.h" 00045 #include "stream.h" 00046 #include "emit.h" 00047 #include "toke.h" 00048 #include "dictionary.h" 00049 #include "vocabfuncts.h" 00050 #include "scanner.h" 00051 #include "errhandler.h" 00052 #include "tokzesc.h" 00053 #include "conditl.h" 00054 #include "flowcontrol.h" 00055 #include "usersymbols.h" 00056 #include "clflags.h" 00057 #include "devnode.h" 00058 #include "tracesyms.h" 00059 #include "nextfcode.h" 00060 00061 #include "parselocals.h" 00062 00063 /* ************************************************************************** 00064 * 00065 * Some VERY IMPORTANT global variables follow 00066 * 00067 **************************************************************************** */ 00068 00069 u8 *statbuf=NULL; /* The word just read from the input stream */ 00070 u8 base=0x0a; /* The numeric-interpretation base */ 00071 00072 /* pci data */ 00073 bool pci_is_last_image=TRUE; 00074 u16 pci_image_rev=0x0001; /* Vendor's Image, NOT PCI Data Structure Rev */ 00075 u16 pci_vpd=0x0000; 00076 00077 00078 /* Having to do with the state of the tokenization */ 00079 bool offs16 = TRUE; /* We are using 16-bit branch- (etc) -offsets */ 00080 bool in_tokz_esc = FALSE; /* TRUE if in "Tokenizer Escape" mode */ 00081 bool incolon = FALSE; /* TRUE if inside a colon definition */ 00082 bool haveend = FALSE; /* TRUE if the "end" code was read. */ 00083 int do_loop_depth = 0; /* How deep we are inside DO ... LOOP variants */ 00084 00085 /* Used for error-checking of IBM-style Locals */ 00086 int lastcolon; /* Location in output stream of latest colon-definition. */ 00087 00088 /* Used for error reporting */ 00089 char *last_colon_defname = NULL; /* Name of last colon-definition */ 00090 char *last_colon_filename = NULL; /* File where last colon-def'n made */ 00091 unsigned int last_colon_lineno; /* Line number of last colon-def'n */ 00092 bool report_multiline = TRUE; /* False to suspend multiline warning */ 00093 unsigned int last_colon_abs_token_no; 00094 00095 /* ************************************************************************** 00096 * Local variables 00097 **************************************************************************** */ 00098 static u16 last_colon_fcode; /* FCode-number assigned to last colon-def'n */ 00099 /* Used for RECURSE */ 00100 00101 static bool do_not_overload = TRUE ; /* False to suspend dup-name-test */ 00102 static bool got_until_eof = FALSE ; /* TRUE to signal "unterminated" */ 00103 00104 static unsigned int last_colon_do_depth = 0; 00105 00106 /* State of headered-ness for name-creation */ 00107 typedef enum headeredness_t { 00108 FLAG_HEADERLESS , 00109 FLAG_EXTERNAL , 00110 FLAG_HEADERS } headeredness ; 00111 static headeredness hdr_flag = FLAG_HEADERLESS ; /* Init'l default state */ 00112 00113 /* Local variables having to do with: */ 00114 /* ... the state of the tokenization */ 00115 static bool is_instance = FALSE; /* Is "instance" is in effect? */ 00116 static char *instance_filename = NULL; /* File where "instance" invoked */ 00117 static unsigned int instance_lineno; /* Line number of "instance" */ 00118 static bool fcode_started = FALSE ; /* Only 1 fcode_starter per block. */ 00119 static bool first_fc_starter = TRUE; /* Only once per tokenization... */ 00120 00121 /* ... with the state of the input stream, */ 00122 static bool need_to_pop_source; 00123 00124 /* ... with the use of the return stack, */ 00125 static int ret_stk_depth = 0; /* Return-Stack-Usage-Depth counter */ 00126 00127 /* ... and with control of error-messaging. */ 00128 /* Should a warning about a dangling "instance" 00129 * be issued at the next device-node change? 00130 */ 00131 static bool dev_change_instance_warning = TRUE; 00132 00133 /* Has a gap developed between "instance" and its application? */ 00134 static bool instance_definer_gap = FALSE; 00135 00136 /* Shared phrases */ 00137 static char *in_tkz_esc_mode = "in Tokenizer-Escape mode.\n"; 00138 00139 00140 /* ************************************************************************** 00141 * 00142 * Function name: skip_ws 00143 * Synopsis: Advance the PC past all whitespace. 00144 * Protect against pointer over-runs 00145 * 00146 * Inputs: 00147 * Parameters: NONE 00148 * Global Variables: 00149 * pc Input-source Scanning pointer 00150 * end End of input-source buffer 00151 * 00152 * Outputs: 00153 * Returned Value: TRUE if PC reached END before non-blank char 00154 * Global Variables: 00155 * pc Advanced to first non-blank char, or to END 00156 * lineno Incremented if encountered new-line along the way 00157 * 00158 * Error Detection: 00159 * Return a TRUE if End of input-source buffer reached before 00160 * non-blank character. Not necessarily an error; allow 00161 * calling routine to decide... 00162 * 00163 **************************************************************************** */ 00164 00165 static bool skip_ws(void) 00166 { 00167 bool retval = TRUE; 00168 char ch_tmp; 00169 00170 for ( ; pc < end; pc++ ) 00171 { 00172 ch_tmp = *pc; 00173 if ( (ch_tmp != '\t') && (ch_tmp != ' ') && (ch_tmp != '\n' ) ) 00174 { 00175 retval = FALSE; 00176 break; 00177 } 00178 if ( ch_tmp == '\n') lineno++; 00179 } 00180 return ( retval ); 00181 } 00182 00183 /* ************************************************************************** 00184 * 00185 * Function name: skip_until 00186 * Synopsis: Advance the PC to the given character. 00187 * Do not copy anything into statbuf. 00188 * Protect against pointer over-runs 00189 * 00190 * Inputs: 00191 * Parameters: 00192 * lim_ch Limiting Character 00193 * Global Variables: 00194 * pc Input-source Scanning pointer 00195 * end End of input-source buffer 00196 * 00197 * Outputs: 00198 * Returned Value: TRUE if PC reached END before finding LIM_CH 00199 * Global Variables: 00200 * pc Advanced to first occurrence of LIM_CH, or to END 00201 * lineno Incremented if encountered new-line along the way 00202 * 00203 * Error Detection: 00204 * Return a TRUE if End of input-source buffer reached before 00205 * desired character. Not necessarily an error; allow calling 00206 * routine to decide... 00207 * 00208 **************************************************************************** */ 00209 00210 bool skip_until( char lim_ch) 00211 { 00212 bool retval = TRUE; 00213 char ch_tmp; 00214 00215 for ( ; pc < end; pc++ ) 00216 { 00217 ch_tmp = *pc; 00218 if ( ch_tmp == lim_ch ) 00219 { 00220 retval = FALSE; 00221 break; 00222 } 00223 if ( ch_tmp == '\n') lineno++; 00224 } 00225 return ( retval ); 00226 } 00227 00228 00229 /* ************************************************************************** 00230 * 00231 * Function name: get_until 00232 * Synopsis: Return, in statbuf, the string from PC to the first 00233 * occurrence of the given delimiter-character.. 00234 * 00235 * Inputs: 00236 * Parameters: 00237 * needle The given delimiter-character 00238 * Global Variables: 00239 * pc Input-source Scanning Pointer 00240 * 00241 * Outputs: 00242 * Returned Value: Length of the string obtained 00243 * Global Variables: 00244 * statbuf The string obtained from the input stream; 00245 * does not include the delimiter-character. 00246 * pc Bumped past the delimiter-character, unless 00247 * it's a new-line, in which case leave it 00248 * to be handled by get_word() 00249 * Local Static Variables: 00250 * got_until_eof Pass this as a signal that the end of the 00251 * buffer was reached before the delimiter; 00252 * Testing whether PC has reached END is 00253 * not a sufficient indication. 00254 * 00255 * Error Detection: 00256 * If string overflows statbuf allocation, ERROR, and 00257 * return "no string" (i.e., length = 0). 00258 * Otherwise, if delimiter not found before eof, keep string. 00259 * Protection against PC pointer-over-run past END is 00260 * provided by skip_until() . Reaching END will be 00261 * handled by calling routine; pass indication along 00262 * via Local Static Variable. 00263 * 00264 * Process Explanation: 00265 * Skip the delimiter-character from further input, unless it's a 00266 * new-line which will be skipped anyway. Let skip_until() 00267 * and get_word() handle incrementing line-number counters. 00268 * If skip_until() indicated reaching end-of-file, don't bump PC 00269 * 00270 * Revision History: 00271 * Updated Thu, 14 Jul 2005 by David L. Paktor 00272 * More robust testing for when PC exceeds END 00273 * Involved replacing firstchar() 00274 * 00275 **************************************************************************** */ 00276 00277 static signed long get_until(char needle) 00278 { 00279 u8 *safe; 00280 unsigned long len = 0; 00281 00282 safe=pc; 00283 00284 got_until_eof = skip_until(needle); 00285 00286 len = pc - safe; 00287 if (len >= GET_BUF_MAX ) 00288 { 00289 tokenization_error( TKERROR, 00290 "get_until buffer overflow. Max is %d.\n", GET_BUF_MAX-1 ); 00291 len = GET_BUF_MAX-1; 00292 } 00293 00294 memcpy(statbuf, safe, len); 00295 statbuf[len]=0; 00296 00297 if ( INVERSE(got_until_eof) ) 00298 { 00299 if ( needle != '\n' ) pc++; 00300 } 00301 return len; 00302 } 00303 00304 00305 /* ************************************************************************** 00306 * 00307 * We are going to use a fairly sophisticated mechanism to 00308 * make a smooth transition between processing the body 00309 * of a Macro, a User-defined Symbol or an FLOADed file 00310 * and the resumption of processing the source file, so 00311 * that the end-of-file will only be seen at the end of 00312 * the primary input file (the one from the command-line). 00313 * This mechanism will be tied in with the get_word() routine 00314 * 00315 * We are going to define a private data-structure in which 00316 * we will save the state of the current source file, 00317 * and from which, of course, we will recover it. Its 00318 * fields will be: 00319 * A pointer to the next structure in the list. 00320 * The saved values of START END and PC 00321 * The saved values of INAME and LINENO 00322 * A flag indicating that get-word should "pause" 00323 * before popping the source-stream because 00324 * the input file will be changing. 00325 * A place from which to save and recover the state of 00326 * whether we're testing for "Multi-line" strings; 00327 * to prevent undeserved "Multi-line" warnings 00328 * during Macro processing. 00329 * A pointer to a "resumption" routine, to call 00330 * when resuming processing the source file; 00331 * the routine takes a pointer parameter 00332 * and has no return value. The pointer 00333 * may be NULL if no routine is needed. 00334 * The pointer to pass as the parameter to the 00335 * resumption routine. 00336 * 00337 **************************************************************************** */ 00338 00339 typedef struct source_state 00340 { 00341 struct source_state *next; 00342 u8 *old_start; 00343 u8 *old_pc; 00344 u8 *old_end; 00345 char *old_iname; 00346 unsigned int old_lineno; 00347 bool pause_before_pop; 00348 bool sav_rep_multlin; 00349 void (*resump_func)(); 00350 _PTR resump_param; 00351 } source_state_t ; 00352 00353 static source_state_t *saved_source = NULL; 00354 00355 00356 /* ************************************************************************** 00357 * 00358 * Function name: push_source 00359 * Synopsis: Save the state of the current source file, in the 00360 * source_state data-structure LIFO linked-list. 00361 * 00362 * Inputs: 00363 * Parameters: 00364 * res_func Pointer to routine to call when resuming 00365 * processing the saved source file. 00366 * res_param Parameter to pass to res_func. 00367 * Either or both pointers may be NULL. 00368 * file_chg TRUE if input file is going to change. 00369 * Global Variables: 00370 * start Points to current input buffer 00371 * end Points to end of current input buffer 00372 * pc Input point in current buffer 00373 * iname Name of current source file 00374 * lineno Line number in current source file 00375 * report_multiline Whether we're testing for "Multi-line" 00376 * Local Static Variables: 00377 * saved_source Pointer to the source_state data-structure 00378 * 00379 * Outputs: 00380 * Returned Value: NONE 00381 * Local Static Variables: 00382 * saved_source Points to new source_state entry 00383 * Memory Allocated 00384 * for the new source_state entry 00385 * When Freed? 00386 * When resuming processing the source file, by drop_source(). 00387 * 00388 * Process Explanation: 00389 * The calling routine will establish the new input buffer via 00390 * a call to init_inbuf() or the like. 00391 * 00392 **************************************************************************** */ 00393 00394 void push_source( void (*res_func)(), _PTR res_parm, bool file_chg ) 00395 { 00396 source_state_t *new_sav_src; 00397 00398 new_sav_src = safe_malloc( sizeof(source_state_t), "pushing Source state"); 00399 00400 new_sav_src->next = saved_source; 00401 new_sav_src->old_start = start; 00402 new_sav_src->old_pc = pc; 00403 new_sav_src->old_end = end; 00404 new_sav_src->old_iname = iname; 00405 new_sav_src->old_lineno = lineno; 00406 new_sav_src->pause_before_pop = file_chg; 00407 new_sav_src->sav_rep_multlin = report_multiline; 00408 new_sav_src->resump_func = res_func; 00409 new_sav_src->resump_param = res_parm; 00410 00411 saved_source = new_sav_src; 00412 } 00413 00414 /* ************************************************************************** 00415 * 00416 * Function name: drop_source 00417 * Synopsis: Remove last saved state of source processing 00418 * from the source_state LIFO linked-list, 00419 * without (or after) restoring. 00420 * 00421 * Inputs: 00422 * Parameters: NONE 00423 * Local Static Variables: 00424 * saved_source Pointer to the source_state data-structure 00425 * 00426 * Outputs: 00427 * Returned Value: NONE 00428 * Local Static Variables: 00429 * saved_source Points to previous source_state entry 00430 * Memory Freed 00431 * Saved source_state entry that was just "dropped" 00432 * 00433 * Error Detection: 00434 * None. Called only when linked-list is known not to be at end. 00435 * 00436 **************************************************************************** */ 00437 00438 static void drop_source( void) 00439 { 00440 source_state_t *former_sav_src = saved_source; 00441 00442 saved_source = saved_source->next ; 00443 free( former_sav_src); 00444 } 00445 00446 /* ************************************************************************** 00447 * 00448 * Function name: pop_source 00449 * Synopsis: Restore the state of source processing as it was 00450 * last saved in the source_state linked-list. 00451 * 00452 * Inputs: 00453 * Parameters: NONE 00454 * Local Static Variables: 00455 * saved_source Pointer to the source_state data-structure 00456 * need_to_pop_source If TRUE, don't check before popping. 00457 * 00458 * Outputs: 00459 * Returned Value: TRUE if reached end of linked-list 00460 * Global Variables: 00461 * start Points to restored input buffer 00462 * end Points to end of restored input buffer 00463 * pc Input point in restored buffer 00464 * iname Name of restored source file 00465 * lineno Line number in restored source file 00466 * report_multiline Restored to saved value. 00467 * Local Static Variables: 00468 * saved_source Points to previous source_state entry 00469 * need_to_pop_source TRUE if postponed popping till next time 00470 * Memory Freed 00471 * Saved source-state entry that was just "popped" 00472 * 00473 * Process Explanation: 00474 * First check the need_to_pop_source flag. 00475 * If it is set, we will clear it and go ahead and pop. 00476 * If it is not set, we will check the pause_before_pop field 00477 * of the top entry in the source_state linked-list. 00478 * If the pause_before_pop field is set, we will set the 00479 * need_to_pop_source flag and return. 00480 * If it is not, we will go ahead and pop. 00481 * If we are going to go ahead and pop, we will call the 00482 * "Resume-Processing" routine (if it's not NULL) before 00483 * we restore the saved source state. 00484 * 00485 **************************************************************************** */ 00486 00487 static bool pop_source( void ) 00488 { 00489 bool retval = TRUE; 00490 00491 if ( saved_source != NULL ) 00492 { 00493 retval = FALSE; 00494 if ( need_to_pop_source ) 00495 { 00496 need_to_pop_source = FALSE; 00497 }else{ 00498 if ( saved_source->pause_before_pop ) 00499 { 00500 need_to_pop_source = TRUE; 00501 return( retval); 00502 } 00503 } 00504 00505 if ( saved_source->resump_func != NULL ) 00506 { 00507 saved_source->resump_func( saved_source->resump_param); 00508 } 00509 report_multiline = saved_source->sav_rep_multlin; 00510 lineno = saved_source->old_lineno ; 00511 iname = saved_source->old_iname ; 00512 end = saved_source->old_end ; 00513 pc = saved_source->old_pc ; 00514 start = saved_source->old_start ; 00515 00516 drop_source(); 00517 } 00518 return( retval); 00519 } 00520 00521 00522 /* ************************************************************************** 00523 * 00524 * Function name: get_word 00525 * Synopsis: Gather the next "word" (aka Forth Token) from the 00526 * input stream. 00527 * A Forth Token is, of course, a string of characters 00528 * delimited by white-space (blank, tab or new-line). 00529 * Do not increment line-number counters here; leave 00530 * the delimiter after the word unconsumed. 00531 * 00532 * Inputs: 00533 * Parameters: NONE 00534 * Global Variables: 00535 * pc Input-stream Scanning Pointer 00536 * Local Static Variables: 00537 * need_to_pop_source If TRUE, pop_source() as first step 00538 * 00539 * Outputs: 00540 * Returned Value: Length of "word" gotten; 00541 * 0 if reached end of file. 00542 * -1 if reached end of primary input 00543 * (I.e., end of all source) 00544 * Global Variables: 00545 * statbuf Copy of "gotten" word 00546 * pc Advanced to end of "gotten" word, 00547 * (i.e., the next word is "consumed") 00548 * unless returning zero. 00549 * abs_token_no Incremented, if valid "word" (token) 00550 * was gotten. 00551 * 00552 * Process Explanation: 00553 * Skip whitespace to the start of the token, 00554 * then skip printable characters to the end of the token. 00555 * That part's easy, but what about when skipping whitespace 00556 * brings you to the end of the input stream? 00557 * First, look at the need_to_pop_source flag. If it's set, 00558 * we came to the end of the input stream the last time 00559 * through. Now we need to pop_source() first. 00560 * Next, we start skipping whitespace; this detects when we've 00561 * reached the end of the input stream. If we have, 00562 * then we need to pop_source() again. 00563 * If pop_source() returned a TRUE, we've reached the end 00564 * of the primary input file. Return -1. 00565 * If pop_source() turned the need_to_pop_source flag 00566 * to TRUE again, then we need to "pause" until the 00567 * next time through; return zero. 00568 * Otherwise, we proceed with collecting the token as described. 00569 * 00570 * Revision History: 00571 * Updated Thu, 23 Feb 2006 by David L. Paktor 00572 * Tied this routine in with a more sophisticated mechanism that 00573 * makes a smooth transition between processing the body of 00574 * a Macro, a User-defined Symbol or an FLOADed file, and 00575 * the resumption of processing the source file, so that the 00576 * end-of-file will only be seen at the end of the primary 00577 * input file (the one that came from the command-line) 00578 * Updated Fri, 24 Feb 2006 by David L. Paktor 00579 * This is trickier than I thought. Added a global indicator 00580 * of whether a file-boundary was crossed while getting 00581 * the word; previously, that was indicated by a return 00582 * value of zero, which now means something else... 00583 * The flag, closed_stream , will be cleared every time this 00584 * routine is entered, and set whenever close_stream() is 00585 * entered. 00586 * Updated Tue, 28 Feb 2006 at 10:13 PST by David L. Paktor 00587 * Trickier still. On crossing a file-boundary, must not 00588 * consume the first word in the resumed file, for one 00589 * call; instead, return zero. Consume it on the next 00590 * call. The closed_stream flag is now irrelevant and 00591 * has gone away. 00592 * 00593 **************************************************************************** */ 00594 00595 signed long get_word( void) 00596 { 00597 size_t len; 00598 u8 *str; 00599 bool keep_skipping; 00600 bool pop_result; 00601 00602 if ( need_to_pop_source ) 00603 { 00604 pop_result = pop_source(); 00605 } 00606 00607 do { 00608 keep_skipping = skip_ws(); 00609 if ( keep_skipping ) 00610 { 00611 pop_result = pop_source(); 00612 if ( pop_result || need_to_pop_source ) 00613 { 00614 statbuf[0] = 0; 00615 if ( pop_result ) 00616 { 00617 return -1; 00618 } 00619 return 0; 00620 } 00621 } 00622 } while ( keep_skipping ); 00623 00624 str=pc; 00625 while ( (str < end) && *str && *str!='\n' && *str!='\t' && *str!=' ') 00626 str++; 00627 00628 len=(size_t)(str-pc); 00629 if (len >= GET_BUF_MAX ) 00630 { 00631 tokenization_error ( FATAL, 00632 "get_word buffer overflow. Max is %d.", GET_BUF_MAX-1 ); 00633 } 00634 00635 memcpy(statbuf, pc, len); 00636 statbuf[len]=0; 00637 00638 #ifdef DEBUG_SCANNER 00639 printf("%s:%d: debug: read token '%s', length=%ld\n", 00640 iname, lineno, statbuf, len); 00641 #endif 00642 pc+=len; 00643 abs_token_no++; 00644 return len; 00645 } 00646 00647 00648 /* ************************************************************************** 00649 * 00650 * Function name: get_word_in_line 00651 * Synopsis: Get the next word on the same line as the current 00652 * line of input. If the end of line was reached 00653 * before a word was found, print an error message 00654 * and return an indication. 00655 * 00656 * Inputs: 00657 * Parameters: 00658 * func_nam Name of the function expecting the same-line 00659 * input; for use in the Error Message. 00660 * If NULL, do not issue Error Message 00661 * Global Variables: 00662 * pc Input character pointer. Saved for comparison 00663 * lineno Current input line number. Saved for comparison 00664 * 00665 * Outputs: 00666 * Returned Value: TRUE = success. Word was acquired on same line. 00667 * Global Variables: 00668 * statbuf Advanced to the next word in the input stream. 00669 * pc Advanced if no error; restored otherwise. 00670 * 00671 * Error Detection: 00672 * If no next word is gotten (i.e., we're at end-of-file), or if 00673 * one is gotten but not on the same line, the routine will 00674 * return FALSE; if func_nam is not NULL, an ERROR Message 00675 * will be issued. 00676 * Also, the values of PC LINENO and ABS_TOKEN_NO will be reset 00677 * to the positions they had when this routine was entered. 00678 * 00679 **************************************************************************** */ 00680 00681 bool get_word_in_line( char *func_nam) 00682 { 00683 signed long wlen; 00684 bool retval = TRUE; 00685 u8 *save_pc = pc; 00686 unsigned int save_lineno = lineno; 00687 unsigned int save_abs_token_no = abs_token_no; 00688 00689 /* Copy of function name, for error message */ 00690 char func_cpy[FUNC_CPY_BUF_SIZE+1]; 00691 00692 /* Do this first, in the likely event that func_nam was statbuf */ 00693 if ( func_nam != NULL ) 00694 { 00695 strncpy( func_cpy, func_nam, FUNC_CPY_BUF_SIZE); 00696 func_cpy[FUNC_CPY_BUF_SIZE] = 0; /* Guarantee a null terminator */ 00697 } 00698 00699 wlen = get_word(); 00700 if ( ( lineno != save_lineno ) || ( wlen <= 0 ) ) 00701 { 00702 abs_token_no = save_abs_token_no; 00703 lineno = save_lineno; 00704 pc = save_pc; 00705 retval = FALSE; 00706 if ( func_nam != NULL ) 00707 { 00708 tokenization_error ( TKERROR, 00709 "Operator %s expects its target on the same line\n", 00710 strupr(func_cpy)); 00711 } 00712 } 00713 return ( retval ); 00714 } 00715 00716 00717 /* ************************************************************************** 00718 * 00719 * Function name: get_rest_of_line 00720 * Synopsis: Get all the remaining text on the same line as 00721 * the current line of input. If there is no text 00722 * (not counting whitespace) before the end of line, 00723 * return an indication. 00724 * 00725 * Inputs: 00726 * Parameters: NONE 00727 * Global Variables: 00728 * pc Input character pointer. Saved for restoration 00729 * lineno Current input line number. Saved for comparison 00730 * 00731 * Outputs: 00732 * Returned Value: TRUE = success. Text was acquired on same line. 00733 * Global Variables: 00734 * statbuf Contains the text found in the input stream. 00735 * pc Advanced to end of line or of whitespace, if 00736 * no error; restored otherwise. 00737 * lineno Preserved if no error; otherwise, restored. 00738 * abs_token_no Restored if error; otherwise, advanced as normal. 00739 * 00740 * Error Detection: 00741 * Routine will return FALSE if no text is gotten on the same line. 00742 * 00743 **************************************************************************** */ 00744 00745 bool get_rest_of_line( void) 00746 { 00747 bool retval = FALSE; 00748 u8 *save_pc = pc; 00749 unsigned int save_lineno = lineno; 00750 unsigned int save_abs_token_no = abs_token_no; 00751 00752 if ( INVERSE( skip_ws() ) ) 00753 { 00754 if ( lineno == save_lineno ) 00755 { 00756 signed long wlen = get_until('\n'); 00757 if ( wlen > 0 ) retval = TRUE; 00758 }else{ 00759 abs_token_no = save_abs_token_no; 00760 lineno = save_lineno; 00761 pc = save_pc; 00762 } 00763 } 00764 return( retval); 00765 } 00766 00767 00768 /* ************************************************************************** 00769 * 00770 * Function name: warn_unterm 00771 * Synopsis: Message for "Unterminated ..." something 00772 * Show saved line-number, where the "something" started, 00773 * and the definition, if any, in which it occurred. 00774 * 00775 * Inputs: 00776 * Parameters: 00777 * severity Type of error/warning message to display 00778 * usually either WARNING or TKERROR 00779 * something String to print after "Unterminated" 00780 * saved_lineno Line-Number where the "something" started 00781 * Global Variables: 00782 * lineno Saved, then restored. 00783 * last_colon_defname Used only if unterm_is_colon is TRUE; 00784 * Local Static Variables: 00785 * unterm_is_colon See 07 Mar 2006 entry under Rev'n History 00786 * 00787 * Outputs: 00788 * Returned Value: NONE 00789 * Global Variables: 00790 * lineno Saved, then restored. 00791 * Local Static Variables: 00792 * unterm_is_colon Reset to FALSE 00793 * Printout: 00794 * Warning or Error message 00795 * 00796 * Revision History: 00797 * Updated Mon, 06 Mar 2006 by David L. Paktor 00798 * Added call to in_last_colon() 00799 * Updated Tue, 07 Mar 2006 by David L. Paktor 00800 * Call to in_last_colon() works okay in most cases except for 00801 * when the "something" is a Colon Definition; there, it 00802 * results in the phrase: ... Definition in definition of ... 00803 * which is awkward. To eliminate that, I am introducing 00804 * a Local Static Variable flag called unterm_is_colon 00805 * which will be set only in the appropriate place and 00806 * re-cleared here. It's a retro-fit, of course; it could 00807 * have been a parameter had the need for it occurred when 00808 * this routine was first constructed... 00809 * 00810 **************************************************************************** */ 00811 00812 static bool unterm_is_colon = FALSE; 00813 void warn_unterm( int severity, char *something, unsigned int saved_lineno) 00814 { 00815 unsigned int tmp = lineno; 00816 lineno = saved_lineno; 00817 if ( unterm_is_colon ) 00818 { 00819 tokenization_error( severity, "Unterminated %s of %s\n", 00820 something, strupr( last_colon_defname) ); 00821 unterm_is_colon = FALSE; 00822 }else{ 00823 tokenization_error( severity, "Unterminated %s", something); 00824 in_last_colon(); 00825 } 00826 lineno = tmp; 00827 } 00828 00829 /* ************************************************************************** 00830 * 00831 * Function name: warn_if_multiline 00832 * Synopsis: Test for "Multi-line ..." something and issue WARNING 00833 * Show saved line-number, where the "something" started 00834 * 00835 * Inputs: 00836 * Parameters: 00837 * something String to print after "Unterminated" 00838 * start_lineno Line-Number where the "something" started 00839 * Global Variables: 00840 * lineno Line-Number where we are now 00841 * iname Input file name, to satisfy ...where_started() 00842 * (Not crossing any actual file boundary.) 00843 * report_multiline TRUE = go ahead with the message 00844 * 00845 * Outputs: 00846 * Returned Value: NONE 00847 * Global Variables: 00848 * report_multiline Restored to TRUE. 00849 * 00850 * Error Detection: 00851 * Only issue message if the current lineno doesn't equal 00852 * the start_lineno 00853 * 00854 * Process Explanation: 00855 * The directive "multi-line" allows the user to specify that 00856 * the next "Multi-line ..." something is intentional, and 00857 * will cause its warning to be suppressed. It remains in 00858 * effect until it's "used"; afterwards, it's reset. 00859 * 00860 **************************************************************************** */ 00861 00862 void warn_if_multiline( char *something, unsigned int start_lineno ) 00863 { 00864 if ( report_multiline && ( start_lineno != lineno ) ) 00865 { 00866 tokenization_error( WARNING, "Multi-line %s, started", something); 00867 where_started( iname, start_lineno); 00868 } 00869 report_multiline = TRUE; 00870 } 00871 00872 00873 /* ************************************************************************** 00874 * 00875 * Function name: string_remark 00876 * Synopsis: Suspend string parsing past end of line and 00877 * whitespace at start of the new line. 00878 * 00879 * Inputs: 00880 * Parameters: 00881 * errmsg_txt Text to be used for error-message. 00882 * Global Variables: 00883 * pc Input-source Scanning pointer 00884 * 00885 * Outputs: 00886 * Returned Value: NONE 00887 * Global Variables: 00888 * pc Will point to first non-blank in new line 00889 * 00890 * Error Detection: 00891 * The return value of the skip_until() or skip_ws() routine 00892 * will indicate if PC goes past END. Issue a WARNING. 00893 * The calling routine will handle things from there. 00894 * 00895 **************************************************************************** */ 00896 00897 static void string_remark(char *errmsg_txt) 00898 { 00899 unsigned int sav_lineno = lineno; 00900 bool eof = skip_until('\n'); 00901 if ( ! eof ) 00902 { 00903 eof = skip_ws(); 00904 } 00905 if ( eof ) 00906 { 00907 warn_unterm(WARNING, errmsg_txt, sav_lineno); 00908 } 00909 00910 } 00911 00912 00913 /* Convert the given string to a number in the supplied base */ 00914 /* Allow -- and ignore -- embedded periods. */ 00915 /* The endptr param represents a pointer that will be updated 00916 * with the address of the first non-numeric character encountered, 00917 * (unless it is a NULL, in which case it is ignored). 00918 */ 00919 /* There is no test for a completely invalid string; 00920 * the calling routine is responsible for ascertaining 00921 * the validity of the string being passed. 00922 */ 00923 static long parse_number(u8 *start, u8 **endptr, int lbase) 00924 { 00925 long val = 0; 00926 bool negative = FALSE ; 00927 int curr; 00928 u8 *nptr=start; 00929 00930 curr = *nptr; 00931 if (curr == '-') 00932 { 00933 negative = TRUE ; 00934 nptr++; 00935 } 00936 00937 for (curr = *nptr; (curr = *nptr); nptr++) { 00938 if ( curr == '.' ) 00939 continue; 00940 if ( curr >= '0' && curr <= '9') 00941 curr -= '0'; 00942 else if (curr >= 'a' && curr <= 'f') 00943 curr += 10 - 'a'; 00944 else if (curr >= 'A' && curr <= 'F') 00945 curr += 10 - 'A'; 00946 else 00947 break; 00948 00949 if (curr >= lbase) 00950 break; 00951 00952 val *= lbase; 00953 val += curr; 00954 } 00955 00956 #ifdef DEBUG_SCANNER 00957 if (curr) 00958 printf( "%s:%d: warning: couldn't parse number '%s' (%d/%d)\n", 00959 iname, lineno, start,curr,lbase); 00960 #endif 00961 00962 if (endptr) 00963 *endptr=nptr; 00964 00965 if (negative) 00966 { 00967 val = -val; 00968 } 00969 return val; 00970 } 00971 00972 /* ************************************************************************** 00973 * 00974 * Function name: add_byte_to_string 00975 * Synopsis: Add the given byte (or character) to the string 00976 * being accumulated in statbuf, but protect 00977 * against a buffer overflow. 00978 * 00979 * Inputs: 00980 * Parameters: 00981 * nu_byte The given character to be added 00982 * walk Pointer to pointer to the position 00983 * in statbuf where the character 00984 * is to be placed 00985 * Global Variables: 00986 * statbuf Buffer where the string is accumulated 00987 * Macros: 00988 * GET_BUF_MAX Size of the buffer 00989 * 00990 * Outputs: 00991 * Returned Value: NONE 00992 * Supplied Pointers: 00993 * **walk Given character is placed here 00994 * *walk Incremented in any case 00995 * 00996 * Error Detection: 00997 * If walk has reached end of string buffer, do not place 00998 * the character, but continue to increment walk . 00999 * Calling routine will detect overflow. 01000 * 01001 **************************************************************************** */ 01002 01003 static void add_byte_to_string( u8 nu_byte, u8 **walk ) 01004 { 01005 if ( *walk - statbuf < GET_BUF_MAX ) 01006 { 01007 **walk = nu_byte; 01008 } 01009 (*walk)++; 01010 } 01011 01012 /* ************************************************************************** 01013 * 01014 * Function name: c_string_escape 01015 * Synopsis: Process C-style escape syntax in strings 01016 * 01017 * Inputs: 01018 * Parameters: 01019 * walk Pointer to pointer to area into 01020 * which to put acquired values 01021 * Global Variables: 01022 * pc Input-source Scanning pointer 01023 * 01024 * Outputs: 01025 * Returned Value: NONE 01026 * Global Variables: 01027 * pc Point to last character processed. 01028 * Supplied Pointers: 01029 * *walk Advanced by number of bytes acquired 01030 * 01031 * Error Detection: 01032 * WARNING conditions. See under "Process Explanation" below. 01033 * 01034 * Process Explanation: 01035 * Start with PC pointing to the first character to process 01036 * i.e., after the backslash. 01037 * We recognize newline, tab and numbers 01038 * A digit-string in the current base can be converted to a number. 01039 * The first non-numeric character ends the numeric sequence 01040 * and gets swallowed up. 01041 * If the number exceeds the size of a byte, use the truncated 01042 * value and issue a WARNING. 01043 * If the first character in the "digit"-string was non-numeric, 01044 * use the character literally and issue a WARNING. 01045 * If the character that ended the numeric sequence is a quote, 01046 * it might be the end of the string, or the start of a 01047 * special-character or even of an "( ... ) hex-sequence, 01048 * so don't swallow it up. 01049 * 01050 * Still to be done: 01051 * Better protection against PC pointer-over-run past END. 01052 * Currently, this works, but it's held together by threads: 01053 * Because init_stream forces a null-byte at the end of 01054 * the input buffer, parse_number() exits immediately upon 01055 * encountering it. This situation could be covered more 01056 * robustly... 01057 * 01058 **************************************************************************** */ 01059 01060 static void c_string_escape( u8 **walk) 01061 { 01062 char c = *pc; 01063 u8 val; 01064 /* We will come out of this "switch" statement 01065 * with a value for val and a decision 01066 * as to whether to write it. 01067 */ 01068 bool write_val = TRUE; 01069 01070 switch (c) 01071 { 01072 case 'n': 01073 /* newline */ 01074 val = '\n'; 01075 break; 01076 case 't': 01077 /* tab */ 01078 val = '\t'; 01079 break; 01080 default: 01081 01082 /* Digit-string? Convert it to a number, using the current base. 01083 * The first non-numeric character ends the numeric sequence 01084 * and gets swallowed up. 01085 * If the number exceeds the size of a byte, use the truncated 01086 * value and issue a WARNING. 01087 * If the first character in the "digit"-string was non-numeric, 01088 * use the character literally and issue a WARNING. 01089 */ 01090 01091 /* 01092 * If the sequence ender is a quote, it might be the end of 01093 * the string, or the start of a special-character or even 01094 * of an "( ... ) hex-sequence, so don't swallow it up. 01095 */ 01096 { 01097 long lval; 01098 u8 *sav_pc = pc; 01099 lval=parse_number(pc, &pc, base); 01100 val = (u8)lval; 01101 #ifdef DEBUG_SCANNER 01102 if (verbose) 01103 printf( "%s:%d: debug: escape code " 01104 "0x%x\n",iname, lineno, val); 01105 #endif 01106 if ( lval > 0x0ff ) 01107 { 01108 tokenization_error ( WARNING, 01109 "Numeric String after \\ overflows byte. " 01110 "Using 0x%02x.\n", val); 01111 } 01112 01113 if ( pc == sav_pc ) 01114 { 01115 /* NOTE: Here, PC hasn't been advanced past its 01116 * saved value, so we can count on C remaining 01117 * unchanged since the start of the routine. 01118 */ 01119 /* Don't use the null-byte at the end of the buffer */ 01120 if ( ( pc >= end ) 01121 /* or a sequence-ending quote. */ 01122 || ( c == '"' ) ) 01123 { 01124 write_val = FALSE; 01125 }else{ 01126 /* In the WARNING message, print the character 01127 * if it's printable or show it in hex 01128 * if it's not. 01129 */ 01130 if ( (c > 0x20 ) && ( c <= 0x7e) ) 01131 { 01132 tokenization_error ( WARNING, 01133 "Unrecognized character, %c, " 01134 "after \\ in string. " 01135 "Using it literally.\n", c); 01136 }else{ 01137 tokenization_error ( WARNING, 01138 "Unrecognized character, 0x%02x, " 01139 "after \\ in string. " 01140 "Using it literally.\n", c); 01141 } 01142 val = c; 01143 } 01144 } 01145 /* NOTE: Here, however, PC may have been advanced... */ 01146 /* Don't swallow the sequence-ender if it's a quote. */ 01147 if ( *pc == '"' ) 01148 { 01149 pc--; 01150 } 01151 01152 } /* End of the "default" clause */ 01153 } /* End of the "switch" statement */ 01154 01155 if ( write_val ) add_byte_to_string( val, walk ); 01156 01157 } 01158 01159 /* ************************************************************************** 01160 * 01161 * Function name: get_sequence 01162 * Synopsis: Process the Hex-Number option in strings 01163 * Protect against PC pointer-over-run past END. 01164 * 01165 * Inputs: 01166 * Parameters: 01167 * **walk Pointer to pointer to area into which 01168 * to put acquired values 01169 * Global Variables: 01170 * pc Input-source Scanning pointer 01171 * end End of input-source buffer 01172 * 01173 * Outputs: 01174 * Returned Value: TRUE = "Normal Completion" (I.e., not EOF) 01175 * Global Variables: 01176 * pc Points at terminating close-paren, or END 01177 * lineno Input File Line-Number Counter, may be incr'd 01178 * Supplied Pointers: 01179 * *walk Advanced by number of values acquired 01180 * 01181 * Error Detection: 01182 * End-of-file encountered before end of hex-sequence: 01183 * Issue a Warning, conclude processing, return FALSE. 01184 * 01185 * Process Explanation: 01186 * SETUP and RULES: 01187 * Start with PC pointing to the first character 01188 * after the '(' (Open-Paren) 01189 * Bytes are gathered from digits in pairs, except 01190 * when separated they are treated singly. 01191 * Allow a backslash in the middle of the sequence 01192 * to skip to the end of the line and past the 01193 * whitespace at the start of the next line, 01194 * i.e., it acts as a comment-escape. 01195 * 01196 * INITIALIZE: 01197 * PV_indx = 0 01198 * Set return-indicator to "Abnormal Completion" 01199 * Ready_to_Parse = FALSE 01200 * Stuff NULL into PVAL[2] 01201 * WHILE PC is less than END 01202 * Pick up character at PC into Next_Ch 01203 * IF Next_Ch is close-paren : 01204 * Set return-indicator to "Normal Completion". 01205 * Done! Break out of loop. 01206 * ENDIF 01207 * IF comment-escape behavior (controlled by means of a 01208 * command-line switch) is allowed 01209 * IF Next_Ch is backslash : 01210 * Skip to end-of line, skip whitespace. 01211 * If that makes PC reach END : WARNING message. 01212 * (Don't need to break out of loop; 01213 * normal test will terminate.) 01214 * CONTINUE Loop. 01215 * (Don't increment PC; PC is already at right place). 01216 * ENDIF 01217 * ENDIF 01218 * IF Next_Ch is a valid Hex-Digit character : 01219 * Stuff it into PVAL[PV_indx] 01220 * IF (PV_indx is 0) : 01221 * Increment PV_indx 01222 * ELSE 01223 * Set Ready_to_Parse to TRUE 01224 * ENDIF 01225 * ELSE 01226 * IF Next_Ch is a New-Line, increment Line Number counter 01227 * IF (PV_indx is 1) : 01228 * Stuff NULL into PVAL[1] 01229 * Set Ready_to_Parse to TRUE 01230 * ENDIF 01231 * ENDIF 01232 * IF Ready_to_Parse 01233 * Parse PVAL 01234 * Stuff into WALK 01235 * Reset PV_indx to zero 01236 * Reset Ready_to_Parse to FALSE 01237 * ENDIF 01238 * Increment PC 01239 * REPEAT 01240 * Return with Normal/Abnormal completion indicator 01241 * 01242 **************************************************************************** */ 01243 01244 static bool get_sequence(u8 **walk) 01245 { 01246 int pv_indx = 0; 01247 bool retval = FALSE; /* "Abnormal Completion" indicator */ 01248 bool ready_to_parse = FALSE; 01249 char next_ch; 01250 char pval[3]; 01251 01252 #ifdef DEBUG_SCANNER 01253 printf("%s:%d: debug: hex field:", iname, lineno); 01254 #endif 01255 pval[2]=0; 01256 01257 while ( pc < end ) 01258 { 01259 next_ch = *pc; 01260 if ( next_ch == ')' ) 01261 { 01262 retval = TRUE; 01263 break; 01264 } 01265 if ( hex_remark_escape ) 01266 { 01267 if ( next_ch == '\\' ) 01268 { 01269 string_remark("string hex-sequence remark"); 01270 continue; 01271 } 01272 } 01273 if ( isxdigit(next_ch) ) 01274 { 01275 pval[pv_indx] = next_ch; 01276 if ( pv_indx == 0 ) 01277 { 01278 pv_indx++; 01279 }else{ 01280 ready_to_parse = TRUE; 01281 } 01282 }else{ 01283 if ( next_ch == '\n' ) lineno++ ; 01284 if ( pv_indx != 0 ) 01285 { 01286 pval[1] = 0; 01287 ready_to_parse = TRUE; 01288 } 01289 } 01290 if ( ready_to_parse ) 01291 { 01292 u8 val = parse_number(pval, NULL, 16); 01293 *((*walk)++)=val; 01294 #ifdef DEBUG_SCANNER 01295 printf(" %02x",val); 01296 #endif 01297 pv_indx = 0; 01298 ready_to_parse = FALSE; 01299 } 01300 pc++; 01301 } 01302 #ifdef DEBUG_SCANNER 01303 printf("\n"); 01304 #endif 01305 return ( retval ); 01306 } 01307 01308 /* ************************************************************************** 01309 * 01310 * Return the length of the string. 01311 * Pack the string, without the terminating '"' (Quote), into statbuf 01312 * Protect against PC pointer-over-run past END. 01313 * Enable Quote-Backslash as a String-Remark Escape. 01314 * Allowability of Quote-Backslash as a String-Remark is under control 01315 * of a command-line switch (string_remark_escape ). 01316 * Allowability of C-style escape characters is under control 01317 * of a command-line switch ( c_style_string_escape ). 01318 * 01319 * Truncate string to size of Forth Packed-String (i.e., uses leading 01320 * count-byte, so limited to 255, number that one byte can express) 01321 * unless the string is being gathered for a Message or is being 01322 * consumed for purposes of ignoring it, in either of which case 01323 * that limit need not be enforced. Parameter "pack_str" controls 01324 * this: TRUE if limit needs to be enforced. 01325 * 01326 * Issue WARNING if string length gets truncated. 01327 * Issue WARNING if string crosses line. 01328 * The issuance of the Multi-line WARNING is under control of a 01329 * one-shot directive similar to OVERLOAD , called MULTI-LINE 01330 * 01331 * Still to be decided: 01332 * Do we want to bring the allowability of strings crossing 01333 * lines under control of a command-line switch? 01334 * 01335 ************************************************************************** */ 01336 01337 static signed long get_string( bool pack_str) 01338 { 01339 u8 *walk; 01340 unsigned long len; 01341 char c; 01342 bool run = TRUE; 01343 unsigned long start_lineno = lineno; /* For warning message */ 01344 01345 /* 01346 * Bump past the single whitespace character that delimits 01347 * the command -- e.g., ." or " or suchlike -- that 01348 * starts the string. Allow new-line to be a command- 01349 * -delimiting whitespace character. Regard any sub- 01350 * sequent whitespace characters as part of the string 01351 */ 01352 if (*pc++=='\n') lineno++; 01353 01354 got_until_eof = TRUE ; 01355 01356 walk=statbuf; 01357 while (run) { 01358 switch ((c=*pc)) 01359 { 01360 /* Standard use of '"' (Quote) for special-char escape */ 01361 case '\"': 01362 /* Skip the '"' (Quote) */ 01363 pc++; 01364 /* End of the buffer also ends the string cleanly */ 01365 if ( pc >= end ) 01366 { 01367 run = FALSE; 01368 got_until_eof = FALSE ; 01369 break; 01370 } 01371 /* Pick up the next char after the '"' (Quote) */ 01372 c=*pc; 01373 switch (c) 01374 { 01375 case '(': 01376 pc++; /* skip the '(' */ 01377 run = get_sequence(&walk); 01378 break; 01379 01380 case 'n': 01381 add_byte_to_string( '\n', &walk); 01382 break; 01383 case 'r': 01384 add_byte_to_string( '\r', &walk); 01385 break; 01386 case 't': 01387 add_byte_to_string( '\t', &walk); 01388 break; 01389 case 'f': 01390 add_byte_to_string( '\f', &walk); 01391 break; 01392 case 'l': 01393 add_byte_to_string( '\n', &walk); 01394 break; 01395 case 'b': 01396 add_byte_to_string( 0x08, &walk); 01397 break; 01398 case '!': 01399 add_byte_to_string( 0x07, &walk); 01400 break; 01401 case '^': 01402 pc++; /* Skip the up-arrow (Caret) */ 01403 add_byte_to_string( *pc & 0x1f , &walk); 01404 break; 01405 /* We're done after any of the whitespace 01406 * characters follows a quote. 01407 */ 01408 case ' ': 01409 case '\t': 01410 /* Advance past the terminating whitespace 01411 * character, except newline. 01412 * Let get_word() handle that. 01413 */ 01414 pc++; 01415 case '\n': 01416 run=FALSE; 01417 got_until_eof = FALSE ; 01418 break; 01419 default: 01420 /* Control allowability of Quote-Backslash 01421 * as a String-Remark by means of a 01422 * command-line switch. 01423 */ 01424 if ( string_remark_escape ) 01425 { 01426 if ( c == '\\' ) 01427 { 01428 string_remark("string-escape remark"); 01429 /* The first non-blank in the new line 01430 * has not been processed yet. 01431 * Back up to allow it to be. 01432 */ 01433 pc--; 01434 break; 01435 } 01436 } 01437 add_byte_to_string( c, &walk); 01438 } 01439 break; 01440 case '\n': 01441 /* Allow strings to cross lines. Include the 01442 * newline in the string. Account for it. 01443 */ 01444 lineno++; 01445 default: 01446 /* Control allowability of C-style escape-character 01447 * syntax by means of a command-line switch. 01448 */ 01449 if ( c_style_string_escape ) 01450 { 01451 if ( c == '\\' ) 01452 { 01453 pc++; 01454 c_string_escape(&walk ); 01455 break; 01456 } 01457 } 01458 add_byte_to_string( c, &walk); 01459 } 01460 /* Advance past the char processed, unless we're done. */ 01461 if ( run ) pc++; 01462 /* Done if we hit end of file before string was concluded */ 01463 if ( pc >= end ) 01464 { 01465 run = FALSE; 01466 if ( got_until_eof ) 01467 { 01468 warn_unterm( WARNING, "string", start_lineno); 01469 /* Prevent multiple messages for one error */ 01470 got_until_eof = FALSE; 01471 } 01472 } 01473 } 01474 01475 warn_if_multiline( "string", start_lineno); 01476 01477 len = walk - statbuf; 01478 if (len >= GET_BUF_MAX ) 01479 { 01480 tokenization_error ( TKERROR, 01481 "get_string buffer overflow. Max is %d\n.", GET_BUF_MAX-1 ); 01482 len = GET_BUF_MAX-1; 01483 } 01484 #ifdef DEBUG_SCANNER 01485 if (verbose) 01486 printf("%s:%d: debug: scanned string: '%s'\n", 01487 iname, lineno, statbuf); 01488 #endif 01489 if ( pack_str && (len > STRING_LEN_MAX) ) 01490 { 01491 tokenization_error ( WARNING, 01492 "String length being truncated to %d.\n", STRING_LEN_MAX ); 01493 len = STRING_LEN_MAX; 01494 } 01495 statbuf[len] = 0; 01496 01497 return len ; 01498 } 01499 01500 01501 /* ************************************************************************** 01502 * 01503 * Function name: handle_user_message 01504 * Synopsis: Collect a user-generated tokenization-time message; 01505 * either print it or discard it. Shared code 01506 * for user_message() and skip_user_message() 01507 * 01508 * Inputs: 01509 * Parameters: 01510 * delim End-of-string delimiter character. 01511 * If it's a double-quote ("), we will use 01512 * the get-string() routine, with all 01513 * its options, to collect the message. 01514 * Otherwise, we'll capture plain text from 01515 * the input stream. 01516 * print_it TRUE if we should print the message 01517 * Local Static Variables: 01518 * got_until_eof TRUE if reached end of buffer before delim. 01519 * Global Variables: 01520 * lineno Save, then restore. 01521 * 01522 * Outputs: 01523 * Returned Value: NONE 01524 * Global Variables: 01525 * statbuf The string will be collected in here 01526 * 01527 * Printout (if print_it is TRUE): 01528 * The string, with new-line tacked on, will be printed from 01529 * the tokenization_error() routine as a MESSAGE. 01530 * The line-number will be shown as of the origin of the message 01531 * 01532 * Error Detection: 01533 * Error-reports will be printed regardless of print_it param. 01534 * If delimiter was not found, show "Unterminated" warning message. 01535 * If delimiter was " (double-quote), the get_string() routine 01536 * already checked for a multi-line construct; if delimiter is 01537 * a new-line, then a multi-line construct is impossible. 01538 * otherwise, we will do the multi-line check here. 01539 * 01540 **************************************************************************** */ 01541 01542 static void handle_user_message( char delim, bool print_it ) 01543 { 01544 signed long wlen; 01545 unsigned int start_lineno = lineno; 01546 unsigned int multiline_start = lineno; /* For warning message */ 01547 bool check_multiline = FALSE; 01548 const char *ug_msg = "user-generated message"; 01549 01550 if ( delim == '"' ) 01551 { 01552 wlen = get_string( FALSE); 01553 }else{ 01554 /* 01555 * When the message-delimiter is a new-line, and the 01556 * command-delimiter was a new-line, it means the 01557 * string length is zero; we won't bump the PC. 01558 * Otherwise, we will honor the convention we extend 01559 * to .( whereby, if the command is delimited 01560 * by a new-line, we allow the string to begin 01561 * on the next line. 01562 */ 01563 if ( delim == '\n' ) 01564 { 01565 if ( *pc != '\n') pc++; 01566 }else{ 01567 if (*pc++=='\n') lineno++; 01568 multiline_start = lineno; 01569 check_multiline = TRUE; 01570 } 01571 wlen = get_until( delim ); 01572 } 01573 01574 if ( print_it ) 01575 { 01576 unsigned int tmp_lineno = lineno; 01577 lineno = start_lineno; 01578 /* Don't add a new-line to body of the message. 01579 * Routine already takes care of that. 01580 * Besides, buffer might be full... 01581 */ 01582 tokenization_error( MESSAGE, statbuf); 01583 lineno = tmp_lineno; 01584 } 01585 01586 if ( got_until_eof ) /* Crude but effective retrofit... */ 01587 { 01588 warn_unterm(WARNING, (char *)ug_msg, start_lineno); 01589 }else{ 01590 if ( check_multiline ) 01591 { 01592 warn_if_multiline( (char *)ug_msg, multiline_start); 01593 } 01594 } 01595 } 01596 01597 /* ************************************************************************** 01598 * 01599 * Function name: user_message 01600 * Synopsis: Collect a user-generated message and 01601 * print it at tokenization-time. 01602 * 01603 * Tokenizer directive (either mode): 01604 * Synonyms String Delimiter 01605 * [MESSAGE] #MESSAGE [#MESSAGE] end-of-line 01606 * #MESSAGE" " 01607 * "Tokenizer-Escape" mode directive String Delimiter 01608 * .( ) 01609 * ." " 01610 * 01611 * Inputs: 01612 * Parameter is the "parameter field" of the TIC entry, which 01613 * was initialized to the end-of-string delimiter character. 01614 * 01615 * Outputs: 01616 * Returned Value: NONE 01617 * Printout: User-message, parsed from input. 01618 * 01619 * Extraneous Remarks: 01620 * We would have preferred to simply use the "character value" 01621 * aspect of the union, but we found portability issues 01622 * between big- and little- -endian processors, so we still 01623 * have to recast its type here. 01624 * 01625 **************************************************************************** */ 01626 01627 void user_message( tic_param_t pfield ) 01628 { 01629 char delim = (char)pfield.deflt_elem ; 01630 handle_user_message( delim, TRUE); 01631 } 01632 01633 /* ************************************************************************** 01634 * 01635 * Function name: skip_user_message 01636 * Synopsis: Collect a user-generated message and discard it. 01637 * Used when ignoring a Conditional section. 01638 * 01639 * Tokenizer directive (either mode): 01640 * Synonyms String Delimiter 01641 * [MESSAGE] #MESSAGE [#MESSAGE] end-of-line 01642 * #MESSAGE" " 01643 * "Tokenizer-Escape" mode directive String Delimiter 01644 * .( ) 01645 * ." " 01646 * 01647 * Inputs: 01648 * Parameters: 01649 * pfield "Parameter field" of the TIC entry, which 01650 * was initialized to the delimiter. 01651 * 01652 * Outputs: 01653 * Returned Value: NONE 01654 * Printout: NONE 01655 * 01656 **************************************************************************** */ 01657 01658 void skip_user_message( tic_param_t pfield ) 01659 { 01660 char delim = (char)pfield.deflt_elem ; 01661 handle_user_message( delim, FALSE); 01662 } 01663 01664 01665 01666 /* ************************************************************************** 01667 * 01668 * Function name: get_number 01669 * Synopsis: If the word retrieved from the input stream is a 01670 * valid number (under the current base) convert it. 01671 * Return an indication if it was not. 01672 * 01673 * Inputs: 01674 * Parameters: 01675 * *result Pointer to place to return the number 01676 * Global Variables: 01677 * statbuf The word just read that is to be converted. 01678 * base The current numeric-interpretation base. 01679 * 01680 * Outputs: 01681 * Returned Value: TRUE = Input was a valid number 01682 * Supplied Pointers: 01683 * *result The converted number, if valid 01684 * otherwise undefined 01685 * 01686 * Revision History: 01687 * Updated Mon, 28 Mar 2005 by David L. Paktor 01688 * Always use the current base. 01689 * Reversed sense of return-flag. 01690 * 01691 **************************************************************************** */ 01692 01693 bool get_number( long *result) 01694 { 01695 u8 *until; 01696 long val; 01697 bool retval = FALSE ; 01698 01699 val = parse_number(statbuf, &until, base); 01700 01701 #ifdef DEBUG_SCANNER 01702 printf("%s:%d: debug: parsing number: base 0x%x, val 0x%lx, " 01703 "processed %ld of %ld bytes\n", iname, lineno, 01704 base, val,(size_t)(until-statbuf), strlen((char *)statbuf)); 01705 #endif 01706 01707 /* If number-parsing ended before the end of the input word, 01708 * then the input word was not a valid number. 01709 */ 01710 if (until==(statbuf+strlen((char *)statbuf))) 01711 { 01712 *result=val; 01713 retval = TRUE; 01714 } 01715 01716 return ( retval ); 01717 } 01718 01719 /* ************************************************************************** 01720 * 01721 * Function name: deliver_number 01722 * Synopsis: Deliver the supplied number according to the 01723 * state of the tokenizer: 01724 * In normal tokenization mode, emit it as an 01725 * FCode literal. 01726 * In "Tokenizer-Escape" mode, push it onto 01727 * the Data Stack. 01728 * 01729 * Inputs: 01730 * Parameters: 01731 * numval The number, verified to be valid. 01732 * Global Variables: 01733 * in_tokz_esc TRUE if tokenizer is in "Tokenizer Escape" mode. 01734 * 01735 * Outputs: 01736 * Returned Value: NONE 01737 * Items Pushed onto Data-Stack: 01738 * Top: The number, if in_tokz_esc was TRUE 01739 * FCode Output buffer: 01740 * If in_tokz_esc was FALSE, a b(lit) token will be written, 01741 * followed by the number. 01742 * 01743 **************************************************************************** */ 01744 01745 static void deliver_number( long numval) 01746 { 01747 if ( in_tokz_esc ) 01748 { 01749 dpush( numval ); 01750 } else { 01751 emit_literal(numval); 01752 } 01753 } 01754 /* ************************************************************************** 01755 * 01756 * Function name: handle_number 01757 * Synopsis: Convert the word just retrieved from the input stream 01758 * to a number. 01759 * Indicate whether the string was a valid number and 01760 * deliver it, as appropriate. 01761 * 01762 * Inputs: 01763 * Parameters: NONE 01764 * Global Variables: 01765 * statbuf The word that was just read, and to be converted. 01766 * 01767 * Outputs: 01768 * Returned Value: TRUE = Input string was a valid number 01769 * If input string was a valid number, the converted number will 01770 * be delivered, as appropriate, by deliver_number(). 01771 * 01772 **************************************************************************** */ 01773 01774 static bool handle_number( void ) 01775 { 01776 bool retval ; 01777 long numval; 01778 01779 retval = get_number( &numval ); 01780 if ( retval ) 01781 { 01782 deliver_number( numval ); 01783 } 01784 01785 return ( retval ); 01786 } 01787 01788 /* ************************************************************************** 01789 * 01790 * Function name: ascii_right_number 01791 * Synopsis: Convert a character sequence to a number, justified 01792 * toward the right (i.e., the low-order bytes) and 01793 * deliver it, as appropriate. 01794 * 01795 * Inputs: 01796 * Parameters: 01797 * in_str The input string 01798 * 01799 * Outputs: 01800 * Returned Value: NONE 01801 * The converted number will be delivered by deliver_number(). 01802 * 01803 * Process Explanation: 01804 * The last four characters in the sequence will become the number. 01805 * If there are fewer than four, they will fill the low-order part 01806 * of the number. 01807 * Example: PCIR is converted to h# 50434952 01808 * CPU is converted to h# 00435055 01809 * and 01810 * LotsOfStuff is equivalent to a# tuff 01811 * and is converted to h# 74756666 01812 * 01813 **************************************************************************** */ 01814 01815 static void ascii_right_number( char *in_str) 01816 { 01817 u8 nxt_ch; 01818 char *str_ptr = in_str; 01819 long numval = 0; 01820 01821 for ( nxt_ch = (u8)*str_ptr ; 01822 ( nxt_ch = (u8)*str_ptr ) != 0 ; 01823 str_ptr++ ) 01824 { 01825 numval = ( numval << 8 ) + nxt_ch ; 01826 } 01827 deliver_number( numval ); 01828 } 01829 01830 01831 /* ************************************************************************** 01832 * 01833 * Function name: ascii_left_number 01834 * Synopsis: Similar to ascii_right_number() except justified 01835 * toward the left (i.e., the high-order bytes). 01836 * 01837 * 01838 * Inputs: 01839 * Parameters: 01840 * in_str The input string 01841 * 01842 * Outputs: 01843 * Returned Value: NONE 01844 * The converted number will be delivered by deliver_number(). 01845 * 01846 * Process Explanation: 01847 * If there are fewer than four characters in the sequence, they 01848 * will fill the high-order part of the number. 01849 * CPU is converted to h# 43505500 01850 * In all other respects, similar to ascii_right_number() 01851 * 01852 **************************************************************************** */ 01853 01854 static void ascii_left_number( char *in_str) 01855 { 01856 u8 nxt_ch; 01857 char *str_ptr = in_str; 01858 long numval = 0; 01859 int shift_amt = 24; 01860 bool shift_over = FALSE ; 01861 01862 for ( nxt_ch = (u8)*str_ptr ; 01863 ( nxt_ch = (u8)*str_ptr ) != 0 ; 01864 str_ptr++ ) 01865 { 01866 if ( shift_over ) numval <<= 8; 01867 if ( shift_amt == 0 ) shift_over = TRUE ; 01868 numval += ( nxt_ch << shift_amt ); 01869 if ( shift_amt > 0 ) shift_amt -= 8; 01870 } 01871 deliver_number( numval ); 01872 01873 } 01874 01875 /* ************************************************************************** 01876 * 01877 * Function name: init_scanner 01878 * Synopsis: Allocate memory the Scanner will need. 01879 * Only need to call once per program run. 01880 * 01881 **************************************************************************** */ 01882 01883 void init_scanner(void) 01884 { 01885 statbuf=safe_malloc(GET_BUF_MAX, "initting scanner"); 01886 } 01887 01888 /* ************************************************************************** 01889 * 01890 * Function name: exit_scanner 01891 * Synopsis: Free up memory the Scanner used 01892 * 01893 **************************************************************************** */ 01894 01895 void exit_scanner(void) 01896 { 01897 free(statbuf); 01898 } 01899 01900 /* ************************************************************************** 01901 * 01902 * Function name: set_hdr_flag 01903 * Synopsis: Set the state of the "headered-ness" flag to the 01904 * value given, unless over-ridden by one or both 01905 * of the "always-..." Command-Line Flags 01906 * 01907 * Inputs: 01908 * Parameters: 01909 * new_flag New setting 01910 * Global Variables: 01911 * always_headers Override HEADERLESS and make HEADERS 01912 * always_external Override HEADERLESS and HEADERS; 01913 * make EXTERNAL 01914 * 01915 * Outputs: 01916 * Returned Value: None 01917 * Local Static Variables: 01918 * hdr_flag Adjusted to new setting 01919 * 01920 * Process Explanation: 01921 * If always_headers is TRUE, and new_flag is not FLAG_EXTERNAL 01922 * then set to FLAG_HEADERS 01923 * If always_external is TRUE, set to FLAG_EXTERNAL, regardless. 01924 * (Note: always_external over-rides always_headers). 01925 * Otherwise, set to new_flag 01926 * 01927 **************************************************************************** */ 01928 01929 static void set_hdr_flag( headeredness new_flag) 01930 { 01931 headeredness new_state = new_flag; 01932 switch ( new_flag) 01933 { 01934 case FLAG_HEADERLESS: 01935 { 01936 if ( always_headers ) 01937 { new_state = FLAG_HEADERS; 01938 } 01939 /* No break. Intentional... */ 01940 } 01941 case FLAG_HEADERS: 01942 { 01943 if ( always_external ) 01944 { new_state = FLAG_EXTERNAL; 01945 } 01946 /* No break. Intentional... */ 01947 } 01948 case FLAG_EXTERNAL: 01949 break; /* Satisfy compiler's error-checking... */ 01950 /* No default needed here... */ 01951 } 01952 01953 hdr_flag = new_state; 01954 01955 } 01956 01957 01958 /* ************************************************************************** 01959 * 01960 * Function name: init_scan_state 01961 * Synopsis: Initialize various state variables for each time 01962 * a new tokenization scan is started. 01963 * 01964 * Inputs: 01965 * Parameters: NONE 01966 * 01967 * Outputs: 01968 * Returned Value: NONE 01969 * Global Variables: Initialized to: 01970 * base 0x0a (I.e., base = "decimal") 01971 * nextfcode By reset_fcode_ranges() 01972 * pci_is_last_image TRUE 01973 * incolon FALSE 01974 * Local Static Variables: 01975 * hdr_flag FLAG_HEADERLESS (unless over-ridden) 01976 * is_instance FALSE 01977 * last_colon_filename NULL 01978 * instance_filename NULL 01979 * dev_change_instance_warning TRUE 01980 * instance_definer_gap FALSE 01981 * need_to_pop_source FALSE 01982 * first_fc_starter TRUE 01983 * ret_stk_depth 0 01984 * Memory Freed 01985 * Copies of input-file name in last_colon_filename and 01986 * instance_filename , if allocated. 01987 * 01988 **************************************************************************** */ 01989 01990 void init_scan_state( void) 01991 { 01992 base = 0x0a; 01993 pci_is_last_image = TRUE; 01994 incolon = FALSE; 01995 is_instance = FALSE; 01996 set_hdr_flag( FLAG_HEADERLESS); 01997 reset_fcode_ranges(); 01998 first_fc_starter = TRUE; 01999 if ( last_colon_filename != NULL ) free( last_colon_filename); 02000 if ( instance_filename != NULL ) free( instance_filename); 02001 last_colon_filename = NULL; 02002 instance_filename = NULL; 02003 dev_change_instance_warning = TRUE; 02004 instance_definer_gap = FALSE; 02005 need_to_pop_source = FALSE; 02006 ret_stk_depth = 0; 02007 } 02008 02009 02010 /* ************************************************************************** 02011 * 02012 * Function name: collect_input_filename 02013 * Synopsis: Save a copy of the current input file name in the 02014 * given variable, for error-reporting purposes 02015 * 02016 * Inputs: 02017 * Parameters: 02018 * saved_nam Pointer to pointer for copy of name 02019 * Global Variables: 02020 * iname Current input file name 02021 * Local Static Variables: 02022 * 02023 * Outputs: 02024 * Returned Value: NONE 02025 * Supplied Pointers: 02026 * *saved_nam Copy of name 02027 * Memory Allocated 02028 * For copy of input file name 02029 * When Freed? 02030 * Subsequent call to this routine with same pointer 02031 * (Last copy made will be freed if starting a new tokenization, 02032 * otherwise, will persist until end of program.) 02033 * Memory Freed 02034 * Previous copy in same pointer. 02035 * 02036 * Process Explanation: 02037 * If there is a previous copy, and it still matches the current 02038 * input-file name, we don't need to free or re-allocate. 02039 * 02040 **************************************************************************** */ 02041 02042 static void collect_input_filename( char **saved_nam) 02043 { 02044 bool update_lcfn = TRUE; /* Need to re-allocate? */ 02045 if ( *saved_nam != NULL ) 02046 { 02047 if ( strcmp( *saved_nam, iname) == 0 ) 02048 { 02049 /* Last collected filename unchanged from iname */ 02050 update_lcfn = FALSE; 02051 }else{ 02052 free( *saved_nam); 02053 } 02054 } 02055 if ( update_lcfn ) 02056 { 02057 *saved_nam = strdup(iname); 02058 } 02059 } 02060 02061 /* ************************************************************************** 02062 * 02063 * Function name: test_in_colon 02064 * Synopsis: Error-check whether a word is being used in the 02065 * correct state, relative to being inside a colon 02066 * definition; issue a message if it's not. 02067 * 02068 * Inputs: 02069 * Parameters: 02070 * wname The name of the word in question 02071 * sb_in_colon TRUE if the name should be used inside 02072 * a colon-definition only; FALSE if 02073 * it may only be used outside of a 02074 * colon-definition. 02075 * severity Type of error/warning message to call. 02076 * usually either WARNING or TKERROR 02077 * use_instead Word the error-message should suggest be 02078 * used "instead". This may be a NULL, 02079 * in which case the "suggestion" part 02080 * of the message will simply be omitted. 02081 * Global Variables: 02082 * incolon State of the tokenization; TRUE if inside 02083 * a colon definition 02084 * 02085 * Outputs: 02086 * Returned Value: TRUE if no error. 02087 * Printout: Error messages as indicated. 02088 * 02089 * Error Detection: 02090 * If the state, relative to being inside a colon-definition, 02091 * is not what the parameter says it should be, issue a 02092 * message of the indicated severity, and return FALSE. 02093 * 02094 **************************************************************************** */ 02095 02096 static bool test_in_colon ( char *wname, 02097 bool sb_in_colon, /* "Should Be IN colon" */ 02098 int severity, 02099 char *use_instead) 02100 { 02101 bool is_wrong; 02102 bool retval = TRUE ; 02103 02104 is_wrong = BOOLVAL(( sb_in_colon != FALSE ) != ( incolon != FALSE )) ; 02105 if ( is_wrong ) 02106 { 02107 char *ui_pt1 = ""; 02108 char *ui_pt2 = ""; 02109 char *ui_pt3 = ""; 02110 retval = FALSE; 02111 if ( use_instead != NULL ) 02112 { 02113 ui_pt1 = " Use "; 02114 ui_pt2 = use_instead; 02115 ui_pt3 = " instead."; 02116 } 02117 tokenization_error ( severity, "The word %s should not be used " 02118 "%sside of a colon definition.%s%s%s\n", strupr(wname), 02119 sb_in_colon ? "out" : "in", ui_pt1, ui_pt2, ui_pt3 ); 02120 } 02121 return ( retval ); 02122 } 02123 02124 /* ************************************************************************** 02125 * 02126 * Function name: must_be_deep_in_do 02127 * Synopsis: Check that the statement in question is called 02128 * from inside the given depth of structures 02129 * of the DO ... LOOP -type (i.e., any combination 02130 * of DO or ?DO and LOOP or +LOOP ). 02131 * Show an error if it is not. 02132 * 02133 **************************************************************************** */ 02134 02135 static void must_be_deep_in_do( int how_deep ) 02136 { 02137 int functional_depth = do_loop_depth; 02138 if ( incolon ) 02139 { 02140 functional_depth -= last_colon_do_depth; 02141 } 02142 if ( functional_depth < how_deep ) 02143 { 02144 char deep_do[64] = ""; 02145 int indx; 02146 bool prefix = FALSE; 02147 02148 for ( indx = 0; indx < how_deep ; indx ++ ) 02149 { 02150 strcat( deep_do, "DO ... "); 02151 } 02152 for ( indx = 0; indx < how_deep ; indx ++ ) 02153 { 02154 if ( prefix ) 02155 { 02156 strcat( deep_do, " ... "); 02157 } 02158 strcat( deep_do, "LOOP"); 02159 prefix = TRUE; 02160 } 02161 02162 tokenization_error( TKERROR, 02163 "%s outside of %s structure", strupr(statbuf), deep_do); 02164 in_last_colon(); 02165 } 02166 02167 } 02168 02169 /* ************************************************************************** 02170 * 02171 * Function name: bump_ret_stk_depth 02172 * Synopsis: Increment or decrement the Return-Stack-Usage-Depth 02173 * counter. 02174 * 02175 * Inputs: 02176 * Parameters: 02177 * bump Amount by which to increment; 02178 * negative number to decrement. 02179 * Local Static Variables: 02180 * ret_stk_depth The Return-Stack-Usage-Depth counter 02181 * 02182 * Outputs: 02183 * Returned Value: NONE 02184 * Local Static Variables: 02185 * ret_stk_depth Incremented or decremented 02186 * 02187 * Process Explanation: 02188 * This simple-seeming function is actually a place-holder 02189 * for future expansion. Proper error-detection of 02190 * Return-Stack usage is considerably more complex than 02191 * what we are implementing here, and is deferred for a 02192 * later revision. 02193 * 02194 * Still to be done: 02195 * Full detection of whether the Return-Stack has been cleared 02196 * when required, including analysis of Return-Stack usage 02197 * within Flow-Control constructs, and before Loop elements... 02198 * 02199 * Extraneous Remarks: 02200 * Some FORTHs use a Loop-Control stack separate from the Return- 02201 * -Stack, but others use the Return-Stack to keep LOOP-control 02202 * elements. An FCode program must be portable between different 02203 * environments, and so must adhere to the restrictions listed 02204 * in the ANSI Spec: 02205 * 02206 * 3.2.3.3 Return stack 02207 * . . . . . . 02208 * A program may use the return stack for temporary storage during the 02209 * execution of a definition subject to the following restrictions: 02210 * A program shall not access values on the return stack (using R@, 02211 * R>, 2R@ or 2R>) that it did not place there using >R or 2>R; 02212 * A program shall not access from within a do-loop values placed 02213 * on the return stack before the loop was entered; 02214 * All values placed on the return stack within a do-loop shall 02215 * be removed before I, J, LOOP, +LOOP, UNLOOP, or LEAVE is 02216 * executed; 02217 * All values placed on the return stack within a definition 02218 * shall be removed before the definition is terminated 02219 * or before EXIT is executed. 02220 * 02221 **************************************************************************** */ 02222 02223 static void bump_ret_stk_depth( int bump) 02224 { 02225 ret_stk_depth += bump; 02226 } 02227 02228 02229 /* ************************************************************************** 02230 * 02231 * Function name: ret_stk_balance_rpt 02232 * Synopsis: Display a Message if usage of the Return-Stack 02233 * appears to be out of balance. 02234 * 02235 * Inputs: 02236 * Parameters: 02237 * before_what Phrase to use in Message; 02238 * if NULL, use statbuf... 02239 * clear_it TRUE if this call should also clear the 02240 * Return-Stack-Usage-Depth counter 02241 * Global Variables: 02242 * statbuf Word currently being processed 02243 * Local Static Variables: 02244 * ret_stk_depth The Return-Stack-Usage-Depth counter 02245 * 02246 * Outputs: 02247 * Returned Value: NONE 02248 * Local Static Variables: 02249 * ret_stk_depth May be cleared 02250 * 02251 * Error Detection: 02252 * Based simply on whether the Return-Stack-Usage-Depth counter 02253 * is zero. This is a weak and uncertain implementation; 02254 * therefore, the Message will be a WARNING phrased with 02255 * some equivocation. 02256 * 02257 * Process Explanation: 02258 * Proper detection of Return-Stack usage errors is considerably 02259 * more complex, and is deferred for a future revision. 02260 * 02261 * Still to be done: 02262 * Correct analysis of Return-Stack usage around Flow-Control 02263 * constructs. Consider, for instance, the following: 02264 * 02265 * blablabla >R yadayada IF R> gubble ELSE flubble R> THEN 02266 * 02267 * It is, in fact, correct, but the present scheme would 02268 * tag it as a possible error. Conversely, something like: 02269 * 02270 * blablabla >R yadayada IF R> gubble THEN 02271 * 02272 * would not get tagged, even though it is actually an error. 02273 * 02274 * The current simple scheme also does not cover Return-Stack 02275 * usage within Do-Loops or before Loop elements like I and 02276 * J or UNLOOP or LEAVE. Implementing something like that 02277 * would probably need to be integrated in with Flow-Control 02278 * constructs, and will be noted in flowcontrol.c 02279 * 02280 **************************************************************************** */ 02281 02282 static void ret_stk_balance_rpt( char *before_what, bool clear_it) 02283 { 02284 if ( ret_stk_depth != 0 ) 02285 { 02286 char *what_flow = ret_stk_depth < 0 ? "deficit" : "excess" ; 02287 char *what_phr = before_what != NULL ? before_what : strupr(statbuf); 02288 02289 tokenization_error( WARNING, 02290 "Possible Return-Stack %s before %s", what_flow, what_phr); 02291 in_last_colon(); 02292 02293 if ( clear_it ) 02294 { 02295 ret_stk_depth = 0; 02296 } 02297 } 02298 } 02299 02300 02301 /* ************************************************************************** 02302 * 02303 * Function name: ret_stk_access_rpt 02304 * Synopsis: Display a Message if an attempt to access a value 02305 * on the Return-Stack appears to occur before 02306 * one was placed there. 02307 * 02308 * Inputs: 02309 * Parameters: NONE 02310 * Global Variables: 02311 * statbuf Word currently being processed 02312 * Local Static Variables: 02313 * ret_stk_depth The Return-Stack-Usage-Depth counter 02314 * 02315 * Outputs: 02316 * Returned Value: NONE 02317 * 02318 * Error Detection: 02319 * Equivocal WARNING, based simply on whether the Return-Stack- 02320 * -Usage-Depth counter not positive. 02321 * 02322 * Process Explanation: 02323 * Proper detection is deferred... 02324 * 02325 * Still to be done: 02326 * Correct analysis of Return-Stack usage... 02327 * 02328 **************************************************************************** */ 02329 02330 static void ret_stk_access_rpt( void) 02331 { 02332 if ( ret_stk_depth <= 0 ) 02333 { 02334 tokenization_error( WARNING, 02335 "Possible Return-Stack access attempt by %s " 02336 "without value having been placed there", 02337 strupr(statbuf) ); 02338 in_last_colon(); 02339 } 02340 } 02341 02342 02343 02344 /* ************************************************************************** 02345 * 02346 * Function name: encode_file 02347 * Synopsis: Input a (presumably binary) file and encode it 02348 * as a series of strings which will be accumulated 02349 * and encoded in a manner appropriate for a property. 02350 * 02351 * Associated Tokenizer directive: encode-file 02352 * 02353 * Error Detection: 02354 * Handled by support routines. 02355 * 02356 **************************************************************************** */ 02357 02358 static void encode_file( const char *filename ) 02359 { 02360 FILE *f; 02361 size_t s; 02362 int num_encoded=0; 02363 02364 tokenization_error( INFO, "ENCODing File %s\n", filename ); 02365 02366 f = open_expanded_file( filename, "rb", "encoding"); 02367 if( f != NULL ) 02368 { 02369 while( (s=fread(statbuf, 1, STRING_LEN_MAX, f)) ) 02370 { 02371 emit_token("b(\")"); 02372 emit_string(statbuf, s); 02373 emit_token("encode-bytes"); 02374 if( num_encoded ) 02375 emit_token("encode+"); 02376 num_encoded += s; 02377 } 02378 fclose( f ); 02379 tokenization_error ( INFO, "ENCODed %d bytes.\n", num_encoded); 02380 } 02381 } 02382 02383 /* ************************************************************************** 02384 * 02385 * Function name: check_name_length 02386 * Synopsis: If the length of a user-defined name exceeds the 02387 * ANSI-specified maximum of 31 characters, issue 02388 * a message. This is a hard-coded limit. 02389 * Although our Tokenizer can handle longer names, 02390 * they will cause big problems when encountered 02391 * by an FCode interpreter. 02392 * If the name is going to be included in the binary 02393 * output, the message severity must be an ERROR. 02394 * Otherwise, if the name is HEADERLESS, the severity 02395 * can be reduced to a Warning; if the name is only 02396 * defined in "Tokenizer Escape" mode the message 02397 * severity can be further reduced to an Advisory. 02398 * 02399 * Inputs: 02400 * Parameters: 02401 * wlen Length of the newly-created word 02402 * Global Variables: 02403 * in_tokz_esc TRUE if in "Tokenizer Escape" mode. 02404 * Local Static Variables: 02405 * hdr_flag State of headered-ness for name-creation 02406 * 02407 * Outputs: 02408 * Returned Value: NONE 02409 * Global Variables: 02410 * Printout: ERROR message if applicable. 02411 * 02412 * Error Detection: 02413 * The whole point of this routine. 02414 * 02415 * Revision History: 02416 * Updated Wed, 20 Jul 2005 by David L. Paktor 02417 * Escalated from merely an informative warning to a TKERROR 02418 * Updated Fri, 21 Oct 2005 by David L. Paktor 02419 * Adjust severity if name doesn't go into the FCode anyway... 02420 * 02421 **************************************************************************** */ 02422 02423 void check_name_length( signed long wlen ) 02424 { 02425 if ( wlen > 31 ) 02426 { 02427 int severity = TKERROR; 02428 if ( in_tokz_esc ) 02429 { severity = INFO; 02430 }else{ 02431 if (hdr_flag == FLAG_HEADERLESS) 02432 { severity = WARNING; 02433 } 02434 } 02435 tokenization_error( severity, 02436 "ANSI Forth does not permit definition of names " 02437 "longer than 31 characters.\n" ); 02438 } 02439 02440 } 02441 02442 02443 /* ************************************************************************** 02444 * 02445 * Function name: definer_name 02446 * Synopsis: Given a defining-word internal token, return 02447 * a printable string for the definer, for use 02448 * in an error-message. 02449 * 02450 * Inputs: 02451 * Parameters: 02452 * definer Internal token for the defining-word 02453 * reslt_ptr Pointer to string-pointer that takes 02454 * the result, if successful 02455 * 02456 * Outputs: 02457 * Returned Value: TRUE if definer was recognized 02458 * Supplied Pointers: 02459 * *reslt_ptr If successful, points to printable string; 02460 * otherwise, left unchanged. 02461 * 02462 * 02463 **************************************************************************** */ 02464 02465 static bool definer_name(fwtoken definer, char **reslt_ptr) 02466 { 02467 bool retval = TRUE; 02468 switch (definer) 02469 { 02470 case VARIABLE: 02471 *reslt_ptr = "VARIABLE"; 02472 break; 02473 case DEFER: 02474 *reslt_ptr = "DEFER"; 02475 break; 02476 case VALUE: 02477 *reslt_ptr = "VALUE"; 02478 break; 02479 case BUFFER: 02480 *reslt_ptr = "BUFFER"; 02481 break; 02482 case CONST: 02483 *reslt_ptr = "CONSTANT"; 02484 break; 02485 case COLON: 02486 *reslt_ptr = "COLON"; 02487 break; 02488 case CREATE: 02489 *reslt_ptr = "CREATE"; 02490 break; 02491 case FIELD: 02492 *reslt_ptr = "FIELD"; 02493 break; 02494 case MACRO_DEF: 02495 *reslt_ptr = "MACRO"; 02496 break; 02497 case ALIAS: 02498 *reslt_ptr = "ALIAS"; 02499 break; 02500 case LOCAL_VAL: 02501 *reslt_ptr = "Local Value name"; 02502 break; 02503 default: 02504 retval = FALSE; 02505 } 02506 02507 return ( retval); 02508 } 02509 02510 02511 /* ************************************************************************** 02512 * 02513 * Function name: as_a_what 02514 * Synopsis: Add the phrase "as a[n] <DEF'N_TYPE>" for the given 02515 * definition-type to the given string buffer. 02516 * 02517 * Inputs: 02518 * Parameters: 02519 * definer Internal token for the defining-word 02520 * as_what The string buffer to which to add. 02521 * 02522 * Outputs: 02523 * Returned Value: TRUE if an assigned name was found 02524 * for the given definer and text 02525 * was added to the buffer. 02526 * Supplied Pointers: 02527 * *as_what Text is added to this buffer. 02528 * 02529 * Process Explanation: 02530 * The calling routine is responsible to make sure the size of 02531 * the buffer is adequate. Allow 25 for this routine. 02532 * The added text will not have spaces before or after; if any 02533 * are needed, they, too, are the responsibility of the 02534 * calling routine. The return value gives a helpful clue. 02535 * 02536 **************************************************************************** */ 02537 02538 bool as_a_what( fwtoken definer, char *as_what) 02539 { 02540 char *defn_type_name; 02541 bool retval = definer_name(definer, &defn_type_name); 02542 if ( retval ) 02543 { 02544 strcat( as_what, "as a"); 02545 /* Handle article preceding definer name 02546 * that starts with a vowel. 02547 */ 02548 /* HACK: Only one definer name -- ALIAS -- 02549 * begins with a vowel. Take advantage 02550 * of that... 02551 * Otherwise, we'd need to do something involving 02552 * strchr( "AEIOU", defn_type_name[0] ) 02553 */ 02554 if ( definer == ALIAS ) strcat( as_what, "n" ); 02555 02556 strcat( as_what, " "); 02557 strcat( as_what, defn_type_name); 02558 } 02559 return( retval); 02560 } 02561 02562 02563 /* ************************************************************************** 02564 * 02565 * Function name: lookup_word 02566 * Synopsis: Find the TIC-entry for the given word in the Current 02567 * mode -- relative to "Tokenizer-Escape" -- and 02568 * Scope into which definitions are being entered. 02569 * Optionally, prepare text for various Message types. 02570 * 02571 * Inputs: 02572 * Parameters: 02573 * stat_name Word to look up 02574 * where_pt1 Pointer to result-display string, part 1 02575 * NULL if not preparing text 02576 * where_pt2 Pointer to result-display string, part 2 02577 * NULL if not preparing text 02578 * Global Variables: 02579 * in_tokz_esc TRUE if in "Tokenizer Escape" mode. 02580 * scope_is_global TRUE if "global" scope is in effect 02581 * current_device_node Current dev-node data-struct 02582 * ibm_locals TRUE if IBM-style Locals are enabled 02583 * 02584 * Outputs: 02585 * Returned Value: Pointer to TIC-entry; NULL if not found 02586 * Supplied Pointers: 02587 * *where_pt1 Result display string, part 1 of 2 02588 * *where_pt2 Result display string, part 2 of 2 02589 * 02590 * Process Explanation: 02591 * We will set the two-part result-display string in this routine 02592 * because only here do we know in which vocabulary the word 02593 * was found. 02594 * Pre-load the two parts of the result-display string. 02595 * If we are in "Tokenizer Escape" mode, look up the word: first, 02596 * in the "Tokenizer Escape" Vocabulary, or, if not found, 02597 * among the "Shared" words. 02598 * Otherwise, we're in Normal" mode. Look it up: first, among the 02599 * Locals, if IBM-style Locals are enabled (it can possibly be 02600 * one if "Tokenizer Escape" mode was entered during a colon- 02601 * -definition); then, if it was not found and if "Device" 02602 * scope is in effect, look in the current device-node; then, 02603 * if not found, in the "core" vocabulary. 02604 * Load the second part of the result-display string with the 02605 * appropriate phrase for whereever it was found. 02606 * Then adjust the first part of the result-display string with 02607 * the definer, if known. 02608 * 02609 * The two strings will be formatted to be printed adjacently, 02610 * without any additional spaces in the printf() format. 02611 * The first part of the result-display string will not start with 02612 * a space, but will have an intermediate space if necessary. 02613 * The second part of the result-display string will not start 02614 * with a space, and will contain the terminating new-line 02615 * if appropriate. It might or might not have been built 02616 * with a call to in_what_node(). 02617 * 02618 * If the calling routine displays the result-display strings, 02619 * it should follow-up with a call to show_node_start() 02620 * This will be harmless if in_what_node() was not used 02621 * in the construction of the result-display string. 02622 * If the calling routine is NOT going to display the result strings, 02623 * it should pass NULLs for the string-pointer pointers. 02624 * 02625 * The second part of the string consists of pre-coded phrases; 02626 * therefore, we can directly assign the pointer. 02627 * The first part of the string, however, has developed into 02628 * something constructed "on the fly". Earlier, it, too, 02629 * had been a directly-assignable pointer; all the callers 02630 * to this routine expect that. Rather than change all the 02631 * callers, we will assign a local buffer for it. 02632 * 02633 * Extraneous Remarks: 02634 * We had to add the rule allowing where_pt1 or where_pt2 to be 02635 * NULL after we introduced the in_what_node() function. 02636 * We had cases where residue from a lookup for processing 02637 * showed up later in an unrelated Message. The NULL rule 02638 * should prevent that. 02639 * 02640 **************************************************************************** */ 02641 02642 static char lookup_where_pt1_buf[32]; 02643 02644 tic_hdr_t *lookup_word( char *stat_name, char **where_pt1, char **where_pt2 ) 02645 { 02646 tic_hdr_t *found = NULL; 02647 bool trail_space = TRUE; 02648 bool doing_lookup = BOOLVAL( ( where_pt1 != NULL ) 02649 && ( where_pt2 != NULL ) ); 02650 char *temp_where_pt2 = "in the core vocabulary.\n"; 02651 02652 lookup_where_pt1_buf[0] = 0; /* Init'lz part-1 buffer */ 02653 02654 /* "Core vocab" refers both to shared fwords and built-in tokens. */ 02655 02656 /* Distinguish between "Normal" and "Tokenizer Escape" mode */ 02657 if ( in_tokz_esc ) 02658 { /* "Tokenizer Escape" mode. */ 02659 found = lookup_tokz_esc( stat_name); 02660 if ( found != NULL ) 02661 { 02662 temp_where_pt2 = in_tkz_esc_mode; 02663 }else{ 02664 /* "Core vocabulary". */ 02665 found = lookup_shared_word( stat_name); 02666 } 02667 }else{ 02668 /* "Normal" tokenization mode */ 02669 if ( ibm_locals ) 02670 { 02671 found = lookup_local( stat_name); 02672 if ( doing_lookup && ( found != NULL ) ) 02673 { 02674 trail_space = FALSE; 02675 temp_where_pt2 = ".\n"; 02676 } 02677 } 02678 02679 if ( found == NULL ) 02680 { 02681 found = lookup_in_dev_node( stat_name); 02682 if ( found != NULL ) 02683 { 02684 if ( doing_lookup ) 02685 { 02686 temp_where_pt2 = in_what_node( current_device_node); 02687 } 02688 }else{ 02689 /* "Core vocabulary". */ 02690 found = lookup_core_word( stat_name); 02691 } 02692 } 02693 } 02694 02695 if ( ( doing_lookup ) && ( found != NULL ) ) 02696 { 02697 if ( as_a_what( found->fword_defr, lookup_where_pt1_buf) ) 02698 { 02699 if ( trail_space ) 02700 { 02701 strcat(lookup_where_pt1_buf, " "); 02702 } 02703 } 02704 *where_pt1 = lookup_where_pt1_buf; 02705 *where_pt2 = temp_where_pt2; 02706 } 02707 return( found); 02708 } 02709 02710 /* ************************************************************************** 02711 * 02712 * Function name: word_exists 02713 * Synopsis: Check whether a given word is already defined in the 02714 * Current mode -- relative to "Tokenizer-Escape" -- 02715 * and Scope into which definitions are being entered. 02716 * Used for error-reporting. 02717 * 02718 * Inputs: 02719 * Parameters: 02720 * stat_name Word to look up 02721 * where_pt1 Pointer to string, part 1 of 2, 02722 * to display in result 02723 * where_pt2 Pointer to string, part 2 of 2, 02724 * to display in result 02725 * 02726 * Outputs: 02727 * Returned Value: TRUE if the name exists. 02728 * Supplied Pointers: 02729 * *where_pt1 Result display string, part 1 of 2 02730 * *where_pt2 Result display string, part 2 of 2 02731 * 02732 * Process Explanation: 02733 * If the calling routine displays the result-display strings, 02734 * it should follow-up with a call to show_node_start() 02735 * 02736 * Extraneous Remarks: 02737 * This used to be a much heftier routine; now it's just 02738 * a wrapper around lookup_word() . 02739 * 02740 **************************************************************************** */ 02741 02742 bool word_exists( char *stat_name, char **where_pt1, char **where_pt2 ) 02743 { 02744 bool retval = FALSE; 02745 tic_hdr_t *found = lookup_word( stat_name, where_pt1, where_pt2 ); 02746 02747 if ( found != NULL ) 02748 { 02749 retval = TRUE; 02750 } 02751 02752 return( retval); 02753 } 02754 02755 /* ************************************************************************** 02756 * 02757 * Function name: warn_if_duplicate 02758 * Synopsis: Check whether a given word is already defined in 02759 * the current mode and issue a warning if it is. 02760 * 02761 * Inputs: 02762 * Parameters: 02763 * stat_name Word to check 02764 * Global Variables: 02765 * verbose_dup_warning Whether to run the check at all. 02766 * Local Static Variables: 02767 * do_not_overload FALSE if OVERLOAD is in effect. 02768 * 02769 * Outputs: 02770 * Returned Value: NONE 02771 * Local Static Variables: 02772 * do_not_overload Restored to TRUE 02773 * Printout: 02774 * Warning message if a duplicate. 02775 * 02776 * Error Detection: 02777 * None. This is merely an informative warning. 02778 * 02779 * Process Explanation: 02780 * "Current mode" -- meaning, whether the tokenizer is operating 02781 * in "Tokenizer Escape" mode or in normal tokenization mode -- 02782 * will be recognized by the word_exists() function. 02783 * 02784 * Extraneous Remarks: 02785 * The OVERLOAD directive is our best shot at creating a more 02786 * fine-grained way to temporarily bypass this test when 02787 * deliberately overloading a name. It would be nice to have 02788 * a mechanism, comparable to the classic 02789 * WARNING @ WARNING OFF ..... WARNING ! 02790 * that could be applied to a range of definitions, but: 02791 * (1) That would require more of a true FORTH infrastructure; 02792 * hence, more effort than I am willing to invest, at 02793 * this juncture, for such a small return, 02794 * and 02795 * (2) Most intentional-overloading ranges only cover a 02796 * single definition anyway. 02797 * 02798 **************************************************************************** */ 02799 02800 void warn_if_duplicate( char *stat_name) 02801 { 02802 if ( verbose_dup_warning && do_not_overload ) 02803 { 02804 char *where_pt1; 02805 char *where_pt2; 02806 if ( word_exists( stat_name, &where_pt1, &where_pt2) ) 02807 { 02808 tokenization_error( WARNING, 02809 "Duplicate definition: %s already exists %s%s", 02810 stat_name, where_pt1, where_pt2 ); 02811 show_node_start(); 02812 } 02813 } 02814 do_not_overload = TRUE; 02815 } 02816 02817 02818 /* ************************************************************************** 02819 * 02820 * Function name: glob_not_allowed 02821 * Synopsis: Print a Message that "XXX is not allowed." 02822 * because Global Scope is in effect. 02823 * Used from several places... 02824 * 02825 * Inputs: 02826 * Parameters: 02827 * severity Severity of the Message 02828 * not_ignoring FALSE = "Ignoring", for the part of the 02829 * message about "How It's being Handled" 02830 * Global Variables: 02831 * statbuf Disallowed word currently being processed 02832 * 02833 * Outputs: 02834 * Returned Value: NONE 02835 * Printout: Message of given severity. 02836 * 02837 **************************************************************************** */ 02838 02839 static void glob_not_allowed( int severity, bool not_ignoring) 02840 { 02841 tokenization_error( severity, "Global Scope is in effect; " 02842 "%s not allowed. %s.\n", 02843 strupr(statbuf), 02844 not_ignoring ? 02845 "Attempting to compensate.." : 02846 "Ignoring" ); 02847 } 02848 02849 02850 /* ************************************************************************** 02851 * 02852 * Function name: not_in_dict 02853 * Synopsis: Print the message "XXX is not in dictionary." 02854 * Used from several places... 02855 * 02856 * Inputs: 02857 * Parameters: 02858 * stat_name Word that could not be processed 02859 * 02860 * Outputs: 02861 * Returned Value: NONE 02862 * Printout: Error message. 02863 * 02864 **************************************************************************** */ 02865 02866 static void not_in_dict( char *stat_name) 02867 { 02868 tokenization_error ( TKERROR, 02869 "Word %s is not in dictionary.\n", stat_name); 02870 } 02871 02872 /* ************************************************************************** 02873 * 02874 * Function name: tokenized_word_error 02875 * Synopsis: Report an error when a word could not be processed 02876 * by the tokenizer. Messages will vary... 02877 * 02878 * Inputs: 02879 * Parameters: 02880 * stat_name Word that could not be processed 02881 * Global Variables: 02882 * in_tokz_esc TRUE if tokenizer is in "Tokenizer Escape" mode. 02883 * 02884 * Outputs: 02885 * Returned Value: NONE 02886 * Printout: Error message. Possible Advisory about 02887 * 02888 * Error Detection: 02889 * Error was detected by the calling routine... 02890 * 02891 * Process Explanation: 02892 * If the tokenizer is in "Tokenizer Escape" mode, the word might 02893 * be one that can be used in normal tokenization mode; 02894 * Conversely, if the tokenizer is in normal-tokenization mode, 02895 * the word might be one that can be used in the "Escape" mode. 02896 * Or, the word is completely unknown. 02897 * Recognizing the current mode is handled by word_exists() 02898 * However, we need to test for the *converse* of the current mode, 02899 * so before we call word_exists() we are going to save and 02900 * invert the setting of in_tokz_esc (and afterwards, of 02901 * course, restore it...) 02902 * 02903 **************************************************************************** */ 02904 02905 static void tokenized_word_error( char *stat_name) 02906 { 02907 char *where_pt1; 02908 char *where_pt2; 02909 bool found_somewhere; 02910 02911 bool sav_in_tokz_esc = in_tokz_esc; 02912 in_tokz_esc = INVERSE(sav_in_tokz_esc); 02913 02914 found_somewhere = word_exists( stat_name, &where_pt1, &where_pt2); 02915 if ( found_somewhere ) 02916 { 02917 tokenization_error ( TKERROR, "The word %s is %s recognized " 02918 "in tokenizer-escape mode.\n", 02919 stat_name, sav_in_tokz_esc ? "not" : "only" ); 02920 } else { 02921 not_in_dict( stat_name); 02922 } 02923 02924 if ( INVERSE(exists_in_ancestor( stat_name)) ) 02925 { 02926 if ( found_somewhere && sav_in_tokz_esc ) 02927 { 02928 tokenization_error(INFO, 02929 "%s is defined %s%s", stat_name, where_pt1, where_pt2 ); 02930 show_node_start(); 02931 } 02932 } 02933 02934 in_tokz_esc = sav_in_tokz_esc; 02935 } 02936 02937 02938 /* ************************************************************************** 02939 * 02940 * Function name: unresolved_instance 02941 * Synopsis: Print the "unresolved instance" message 02942 * 02943 * Inputs: 02944 * Parameters: 02945 * severity Severity of the Message 02946 * Local Static Variables: 02947 * instance_filename File where "instance" invoked 02948 * instance_lineno Line number where "instance" invoked 02949 * 02950 * Outputs: 02951 * Returned Value: NONE 02952 * Printout: Message. 02953 * 02954 * Error Detection: 02955 * Error was detected by the calling routine... 02956 * 02957 **************************************************************************** */ 02958 02959 static void unresolved_instance( int severity) 02960 { 02961 tokenization_error( severity, "Unresolved \"INSTANCE\"" ); 02962 just_where_started( instance_filename, instance_lineno ); 02963 } 02964 02965 02966 /* ************************************************************************** 02967 * 02968 * Function name: modified_by_instance 02969 * Synopsis: Print the "[not] modified by instance" message 02970 * 02971 * Inputs: 02972 * Parameters: 02973 * definer Internal token for the defining-word 02974 * was_modded FALSE if "not modified..." 02975 * Local Static Variables: 02976 * instance_filename File where "instance" invoked 02977 * instance_lineno Line number where "instance" invoked 02978 * 02979 * Outputs: 02980 * Returned Value: NONE 02981 * Printout: WARNING message. 02982 * 02983 * Error Detection: 02984 * Error was detected by the calling routine... 02985 * 02986 **************************************************************************** */ 02987 02988 static void modified_by_instance( fwtoken definer, bool was_modded) 02989 { 02990 char *was_not = was_modded ? "was" : "not" ; 02991 char *defn_type_name; 02992 02993 /* No need to check the return value */ 02994 definer_name(definer, &defn_type_name); 02995 02996 tokenization_error ( WARNING, 02997 "%s definition %s modified by \"INSTANCE\"", 02998 defn_type_name, was_not ); 02999 just_where_started( instance_filename, instance_lineno ); 03000 } 03001 03002 /* ************************************************************************** 03003 * 03004 * Function name: validate_instance 03005 * Synopsis: If "instance" is in effect, check whether it is 03006 * appropriate to the defining-word being called. 03007 * 03008 * Inputs: 03009 * Parameters: 03010 * definer Internal token for the defining-word 03011 * Local Static Variables: 03012 * is_instance TRUE if "instance" is in effect. 03013 * instance_definer_gap TRUE if invalid definer(s) invoked 03014 * since "instance" went into effect. 03015 * 03016 * Outputs: 03017 * Returned Value: NONE 03018 * Local Static Variables: 03019 * is_instance Reset to FALSE if definer was valid. 03020 * instance_definer_gap TRUE if definer was not valid; 03021 * FALSE if definer was valid. 03022 * 03023 * Error Detection: 03024 * If "instance" is in effect, the only defining-words that are 03025 * valid are: value variable defer or buffer: Attempts 03026 * to use any other defining-word will be reported with a 03027 * WARNING, but "instance" will remain in effect. 03028 * If an invalid defining-word was invoked since "instance" went 03029 * into effect, then, when it is finally applied to a valid 03030 * definer, issue a WARNING. 03031 * 03032 * Process Explanation: 03033 * Implicit in the Standard is the notion that, once INSTANCE has 03034 * been executed, it remains in effect until a valid defining- 03035 * word is encountered. We will do the same. 03036 * 03037 **************************************************************************** */ 03038 03039 static void validate_instance(fwtoken definer) 03040 { 03041 if ( is_instance ) 03042 { 03043 bool is_error = TRUE ; 03044 03045 switch ( definer) 03046 { 03047 case VALUE: 03048 case VARIABLE: 03049 case DEFER: 03050 case BUFFER: 03051 is_error = FALSE; 03052 /* No default needed, likewise, no breaks; */ 03053 /* but some compilers get upset without 'em... */ 03054 default: 03055 break; 03056 } 03057 03058 if( is_error ) 03059 { 03060 modified_by_instance(definer, FALSE ); 03061 instance_definer_gap = TRUE; 03062 }else{ 03063 if ( instance_definer_gap ) 03064 { 03065 modified_by_instance(definer, TRUE ); 03066 } 03067 is_instance = FALSE; 03068 instance_definer_gap = FALSE; 03069 } 03070 } 03071 } 03072 03073 03074 /* ************************************************************************** 03075 * 03076 * Function name: trace_creation 03077 * Synopsis: If the word being created is on the Trace List, 03078 * display the appropriate message 03079 * 03080 * Inputs: 03081 * Parameters: 03082 * definer Internal token for the defining-word 03083 * nu_name The word being created 03084 * Global Variables: 03085 * verbose No point in doing all this if we're 03086 * not showing the message anyway... 03087 * in_tokz_esc TRUE if we are in Tokenizer-Escape mode 03088 * scope_is_global TRUE if "global" scope is in effect 03089 * current_device_node Current dev-node data-struct 03090 * 03091 * Outputs: 03092 * Returned Value: NONE 03093 * Printout: 03094 * Advisory Message, if the word is on the Trace List. 03095 * 03096 * Process Explanation: 03097 * The order of scope-checking is important: 03098 * A Local has no scope beyond the definition in which it occurs. 03099 * Tokenizer-Escape mode supercedes "Normal" mode, and renders 03100 * moot the differences between Global and Device scope. 03101 * Global scope is mutually exclusive with Device scope. 03102 * Device scope needs to identify where the Current device-node 03103 * began. 03104 * 03105 **************************************************************************** */ 03106 03107 void trace_creation( fwtoken definer, char *nu_name) 03108 { 03109 if ( verbose ) 03110 { 03111 if ( is_on_trace_list( nu_name) ) 03112 { 03113 char as_what[96] = ""; 03114 bool show_last_colon = BOOLVAL( definer == LOCAL_VAL); 03115 03116 as_a_what( definer, as_what); /* No need to check return value. */ 03117 03118 /* Scope-checking starts here, unless show_last_colon is TRUE. 03119 * Come out of this with as_what[] filled up and 03120 * terminated with a new-line, if appropriate, 03121 */ 03122 while ( ! show_last_colon ) 03123 { 03124 strcat( as_what, " "); 03125 03126 if ( in_tokz_esc ) 03127 { 03128 strcat( as_what, in_tkz_esc_mode); 03129 break; 03130 } 03131 03132 if ( scope_is_global ) 03133 { 03134 strcat( as_what, "with Global scope.\n"); 03135 }else{ 03136 /* In Device scope. Show the Current node. */ 03137 strcat( as_what, in_what_node( current_device_node)); 03138 } 03139 break; 03140 03141 } /* Destination of BREAKs ... */ 03142 03143 tokenization_error(INFO, "Creating %s %s", nu_name, as_what); 03144 03145 if ( show_last_colon ) 03146 { 03147 in_last_colon(); 03148 }else{ 03149 show_node_start(); 03150 } 03151 03152 } 03153 } 03154 } 03155 03156 /* ************************************************************************** 03157 * 03158 * Function name: create_word 03159 * Synopsis: 03160 * 03161 * Inputs: 03162 * Parameters: 03163 * definer Internal token for the defining-word 03164 * Global Variables: 03165 * control_stack_depth Number of "Control Stack" entries in effect 03166 * nextfcode FCode-number to be assigned to the new name 03167 * statbuf Symbol last read from the input stream 03168 * pc Input-source Scanning pointer 03169 * hdr_flag State of headered-ness for name-creation 03170 * force_tokens_case If TRUE, force token-names' case in FCode 03171 * force_lower_case_tokens 03172 * If force_tokens_case is TRUE, this 03173 * determines which case to force 03174 * iname Input-source file name; for error-reporting 03175 * lineno Input-source Line number; also for err-rep't 03176 * 03177 * Outputs: 03178 * Returned Value: TRUE if successful 03179 * Global Variables: 03180 * nextfcode Incremented (by bump_fcode() ) 03181 * statbuf Advanced to next symbol; must be re-read 03182 * pc Advanced, then restored to previous value 03183 * Memory Allocated 03184 * Copy of the name being defined, by support routine. 03185 * Copy of input-source file name, for error-reporting 03186 * When Freed? 03187 * Copy of name being defined is freed when Current Device Vocab 03188 * is "finished", or at end of tokenization. 03189 * Copy of input-source file name is freed at end of this routine. 03190 * 03191 * Error Detection: 03192 * ERROR if already inside a colon-definition. Discontinue 03193 * processing and return FALSE. 03194 * ERROR if inside a control-structure. Continue processing, 03195 * though, to catch other errors, and even return TRUE; 03196 * except: leave the new token undefined. 03197 * Warning on duplicate name (subject to command-line control) 03198 * Message if name is excessively long; Warning if headerless. 03199 * FATAL if the value of nextfcode is larger than the legal 03200 * maximum for an FCode, (0x0fff). 03201 * 03202 * Revision History: 03203 * Updated Thu, 24 Mar 2005 by David L. Paktor 03204 * Optional warning when name about to be created is a 03205 * duplicate of an existing name. 03206 * Updated Wed, 30 Mar 2005 by David L. Paktor 03207 * Warning when name length exceeds ANSI-specified max (31 chars). 03208 * Updated Tue, 05 Apr 2005 by David L. Paktor 03209 * Add "definer" parameter and call to add_definer() . Part 03210 * of the mechanism to forbid attempts to use the TO 03211 * directive to change values of CONSTANTs in particular 03212 * and of inappropriate targets in general. 03213 * Updated Fri, 06 May 2005 by David L. Paktor 03214 * Error-detection of DO ... LOOP and BEGIN ... imbalance 03215 * Error-detection of nextfcode exceeding legal maximum (0x0fff). 03216 * Updated Wed, 20 Jul 2005 by David L. Paktor 03217 * Put Duplicate-Name-Test under command-line control... 03218 * Updated Wed, 24 Aug 2005 by David L. Paktor 03219 * Error-detection via clear_control_structs() routine. 03220 * Updated Tue, 10 Jan 2006 by David L. Paktor 03221 * Convert to tic_hdr_t type vocabulary. 03222 * Updated Thu, 20 Apr 2006 by David L. Paktor 03223 * Allow creation of new definition within body of a flow-control 03224 * structure. (Remove error-detection via clear_control_structs) 03225 * Updated Tue, 13 Jun 2006 by David L. Paktor 03226 * Move detection of out-of-bounds nextfcode to assigning_fcode() 03227 * routine, which also detects Overlapping Ranges error. 03228 * Updated Thu, 27 Jun 2006 by David L. Paktor 03229 * Report Error for attempt to create def'n inside control structure. 03230 * 03231 * Extraneous Remarks: 03232 * We must not set incolon to TRUE (if we are creating a colon 03233 * definition) until *AFTER* this routine has been called, due 03234 * to the initial error-checking. If we need to detect whether 03235 * we are creating a colon definition, we can do so by testing 03236 * whether the parameter, DEFINER, equals COLON . 03237 * 03238 **************************************************************************** */ 03239 03240 static bool create_word(fwtoken definer) 03241 { 03242 signed long wlen; 03243 bool retval = FALSE; 03244 char *defn_type_name; 03245 03246 /* If already inside a colon, ERROR and discontinueprocessing */ 03247 /* If an alias to a definer is used, show the name of the alias */ 03248 if ( test_in_colon(statbuf, FALSE, TKERROR, NULL) ) 03249 { 03250 char defn_type_buffr[32] = ""; 03251 unsigned int old_lineno = lineno; /* For error message */ 03252 bool define_token = TRUE; 03253 03254 { /* Set up definition-type text for error-message */ 03255 03256 /* No need to check the return value */ 03257 definer_name(definer, &defn_type_name); 03258 03259 strcat( defn_type_buffr, defn_type_name); 03260 strcat( defn_type_buffr, " definition"); 03261 } 03262 /* If in a control-structure, ERROR but continue processing */ 03263 if ( control_stack_depth != 0 ) 03264 { 03265 announce_control_structs( TKERROR, defn_type_buffr, 0); 03266 /* Leave the new token undefined. */ 03267 define_token = FALSE; 03268 } 03269 03270 /* Get the name of the new token */ 03271 wlen = get_word(); 03272 03273 #ifdef DEBUG_SCANNER 03274 printf("%s:%d: debug: defined new word %s, fcode no 0x%x\n", 03275 iname, lineno, name, nextfcode); 03276 #endif 03277 if ( wlen <= 0 ) 03278 { 03279 warn_unterm( TKERROR, defn_type_buffr, old_lineno); 03280 }else{ 03281 bool emit_token_name = TRUE; 03282 03283 /* Handle Tracing of new definitions */ 03284 trace_creation( definer, statbuf); 03285 03286 /* Other Error or Warnings as applicable */ 03287 validate_instance( definer); 03288 warn_if_duplicate( statbuf); 03289 check_name_length( wlen); 03290 03291 /* Bump FCode; error-check as applicable */ 03292 assigning_fcode(); 03293 03294 /* Define the new token, unless disallowed */ 03295 add_to_current( statbuf, nextfcode, definer, define_token); 03296 03297 /* Emit appropriate FCodes: Type of def'n, */ 03298 switch ( hdr_flag ) 03299 { 03300 case FLAG_HEADERS: 03301 emit_token("named-token"); 03302 break; 03303 03304 case FLAG_EXTERNAL: 03305 emit_token("external-token"); 03306 break; 03307 03308 default: /* FLAG_HEADERLESS */ 03309 emit_token("new-token"); 03310 emit_token_name = FALSE; 03311 } 03312 03313 /* Emit name of token, if applicable */ 03314 if ( emit_token_name ) 03315 { 03316 if ( force_tokens_case ) 03317 { 03318 if ( force_lower_case_tokens ) 03319 { 03320 strlwr( statbuf); 03321 }else{ 03322 strupr( statbuf); 03323 } 03324 } 03325 emit_string((u8 *)statbuf, wlen); 03326 } 03327 03328 /* Emit the new token's FCode */ 03329 emit_fcode(nextfcode); 03330 03331 /* Prepare FCode Assignment Counter for next definition */ 03332 bump_fcode(); 03333 03334 /* Declare victory */ 03335 retval = TRUE; 03336 } 03337 } 03338 return( retval); 03339 } 03340 03341 03342 /* ************************************************************************** 03343 * 03344 * Function name: cannot_apply 03345 * Synopsis: Print error message of the form: 03346 * "Cannot apply <func> to <targ>, which is a <def'n>" 03347 * 03348 * Inputs: 03349 * Parameters: 03350 * func_nam The name of the function 03351 * targ_nam The name of the target 03352 * defr The numeric-code of the definer-type 03353 * 03354 * Outputs: 03355 * Returned Value: NONE 03356 * Printout: 03357 * The error message is the entire printout of this routine 03358 * 03359 * Error Detection: 03360 * Error was detected by calling routine 03361 * 03362 * Process Explanation: 03363 * The calling routine already looked up the definer for its 03364 * own purposes, so we don't need to do that again here. 03365 * 03366 * Still to be done: 03367 * If the definer-name is not found, we might still look up 03368 * the target name in the various vocabularies and use 03369 * a phrase for those. E.g., if it is a valid token, 03370 * we could say it's defined as a "primitive". (I'm 03371 * not sure what we'd say about an FWord...) 03372 * 03373 **************************************************************************** */ 03374 03375 static void cannot_apply( char *func_nam, char *targ_nam, fwtoken defr) 03376 { 03377 char *defr_name = "" ; 03378 const char *defr_phrase = ", which is defined as a " ; 03379 03380 if ( ! definer_name(defr, &defr_name) ) 03381 { 03382 defr_phrase = ""; 03383 } 03384 03385 tokenization_error ( TKERROR , 03386 "Cannot apply %s to %s %s%s.\n", 03387 func_nam, targ_nam, defr_phrase, defr_name ); 03388 03389 } 03390 03391 03392 /* ************************************************************************** 03393 * 03394 * Function name: lookup_with_definer 03395 * Synopsis: Return pointer to data-structure of named word, 03396 * if it's valid in Current context, and supply its 03397 * definer. If it's not valid in Current context, 03398 * see if it might be a Local, and supply that definer. 03399 * 03400 * Inputs: 03401 * Parameters: 03402 * stat_name Name to look up 03403 * *definr Pointer to place to put the definer. 03404 * 03405 * Outputs: 03406 * Returned Value: Pointer to data-structure, or 03407 * NULL if not in Current context. 03408 * Supplied Pointers: 03409 * *definr Definer; possibly LOCAL_VAL 03410 * 03411 * Process Explanation: 03412 * If the name is not found in the Current context, and does not 03413 * exist as a Local, *definr will remain unchanged. 03414 * 03415 * Extraneous Remarks: 03416 * This is an odd duck^H^H^H^H^H^H^H^H^H^H^H^H a highly-specialized 03417 * routine created to meet some corner-case needs engendered by 03418 * the conversion to tic_hdr_t vocabularies all around, combined 03419 * with an obsessive urge to preserve a high level of detail in 03420 * our error-messages. 03421 * 03422 **************************************************************************** */ 03423 03424 static tic_hdr_t *lookup_with_definer( char *stat_name, fwtoken *definr ) 03425 { 03426 tic_hdr_t *retval = lookup_current( stat_name); 03427 if ( retval != NULL ) 03428 { 03429 *definr = retval->fword_defr; 03430 }else{ 03431 if ( exists_as_local( stat_name) ) *definr = LOCAL_VAL; 03432 } 03433 return ( retval ); 03434 } 03435 03436 /* ************************************************************************** 03437 * 03438 * Function name: validate_to_target 03439 * Synopsis: Print a message if the intended target 03440 * of the TO directive is not valid 03441 * 03442 * Inputs: 03443 * Parameters: NONE 03444 * Global Variables: 03445 * statbuf Next symbol to be read from the input stream 03446 * pc Input-source Scanning pointer 03447 * 03448 * Outputs: 03449 * Returned Value: TRUE = Allow b(to) token to be output. 03450 * Global Variables: 03451 * statbuf Advanced to next symbol; must be re-read 03452 * pc Advanced, then restored to previous value 03453 * 03454 * Error Detection: 03455 * If next symbol is not a valid target of TO , issue ERROR 03456 * message. Restored pc will cause the next symbol to 03457 * be processed by ordinary means. 03458 * Allow b(to) token to be output in selected cases. Even if 03459 * user has set the "Ignore Errors" flag, certain targets are 03460 * still too risky to be allowed to follow a b(to) token; 03461 * if "Ignore Errors" is not set, output won't get created 03462 * anyway. 03463 * Issue ERROR in the extremely unlikely case that "to" is the 03464 * last word in the Source. 03465 * 03466 * Process Explanation: 03467 * Valid targets for a TO directive are words defined by: 03468 * DEFER, VALUE and arguably VARIABLE. We will also allow 03469 * CONSTANT, but will still issue an Error message. 03470 * After the check, restore pc ; this was only a look-ahead. 03471 * Also restore lineno and abs_token_no 03472 * 03473 * Extraneous Remarks: 03474 * Main part of the mechanism to detect attempts to use the TO 03475 * directive to change the values of CONSTANTs in particular 03476 * and of inappropriate targets in general. 03477 * 03478 **************************************************************************** */ 03479 03480 static bool validate_to_target( void ) 03481 { 03482 signed long wlen; 03483 tic_hdr_t *test_entry; 03484 u8 *saved_pc = pc; 03485 char *cmd_cpy = strupr( strdup( statbuf)); /* For error message */ 03486 unsigned int saved_lineno = lineno; 03487 unsigned int saved_abs_token_no = abs_token_no; 03488 fwtoken defr = UNSPECIFIED ; 03489 bool targ_err = TRUE ; 03490 bool retval = FALSE ; 03491 03492 wlen = get_word(); 03493 if ( wlen <= 0 ) 03494 { 03495 warn_unterm( TKERROR, cmd_cpy, saved_lineno); 03496 }else{ 03497 03498 test_entry = lookup_with_definer( statbuf, &defr); 03499 if ( test_entry != NULL ) 03500 { 03501 switch (defr) 03502 { 03503 case VARIABLE: 03504 tokenization_error( WARNING, 03505 "Applying %s to a VARIABLE (%s) is " 03506 "not recommended; use ! instead.\n", 03507 cmd_cpy, statbuf); 03508 case DEFER: 03509 case VALUE: 03510 targ_err = FALSE ; 03511 case CONST: 03512 retval = TRUE ; 03513 /* No default needed, likewise, no breaks; */ 03514 /* but some compilers get upset without 'em... */ 03515 default: 03516 break; 03517 } 03518 } 03519 03520 if ( targ_err ) 03521 { 03522 cannot_apply(cmd_cpy, strupr(statbuf), defr ); 03523 } 03524 03525 pc = saved_pc; 03526 lineno = saved_lineno; 03527 abs_token_no = saved_abs_token_no; 03528 } 03529 free( cmd_cpy); 03530 return( retval); 03531 } 03532 03533 03534 /* ************************************************************************** 03535 * 03536 * Function name: you_are_here 03537 * Synopsis: Display a generic Advisory of the Source command 03538 * or directive encountered and being processed 03539 * 03540 * Inputs: 03541 * Parameters: NONE 03542 * Global Variables: 03543 * statbuf The command being processed 03544 * 03545 * Outputs: 03546 * Returned Value: NONE 03547 * Printout: 03548 * Advisory message 03549 * 03550 **************************************************************************** */ 03551 03552 static void you_are_here( void) 03553 { 03554 tokenization_error( INFO, 03555 "%s encountered; processing...\n", 03556 strupr(statbuf) ); 03557 } 03558 03559 03560 /* ************************************************************************** 03561 * 03562 * Function name: fcode_starter 03563 * Synopsis: Respond to one of the "FCode Starter" words 03564 * 03565 * Inputs: 03566 * Parameters: 03567 * token_name The FCode-token for this "Starter" word 03568 * spread The separation between tokens. 03569 * is_offs16 Whether we are using a 16-bit number 03570 * for branch- (and suchlike) -offsets, 03571 * or the older-style 8-bit offset numbers. 03572 * Global Variables: 03573 * iname Input-File name, used to set ifile_name 03574 * field of current_device_node 03575 * lineno Current Input line number, used to set 03576 * line_no field of current_device_node 03577 * Local Static Variables: 03578 * fcode_started If this is TRUE, we have an Error. 03579 * first_fc_starter Control calling reset_fcode_ranges() ; 03580 * only on the first fcode_starter of 03581 * a tokenization. 03582 * 03583 * Outputs: 03584 * Returned Value: NONE 03585 * Global Variables: 03586 * offs16 Global "16-bit-offsets" flag 03587 * current_device_node The ifile_name and line_no fields will be 03588 * loaded with the current input file name 03589 * and line number. This node will be the 03590 * top-level device-node. 03591 * FCode Ranges will be reset the first time per tokenization 03592 * that this routine is entered. 03593 * A new FCode Range will be started every time after that. 03594 * Local Static Variables: 03595 * fcode_started Set to TRUE. We invoke the starter only 03596 * once per image-block. 03597 * first_fc_starter Reset to FALSE if not already 03598 * Memory Allocated 03599 * Duplicate of Input-File name 03600 * When Freed? 03601 * In fcode_ender() 03602 * 03603 * Error Detection: 03604 * Spread of other than 1 -- Warning message. 03605 * "FCode Starter" previously encountered -- Warning and ignore. 03606 * 03607 * Question under consideration: 03608 * Do we want directives -- such as definitions of constants -- 03609 * supplied before the "FCode Starter", to be considered as 03610 * taking place in "Tokenizer Escape" mode? That would mean 03611 * the "Starter" functions must be recognized in "Tokenizer 03612 * Escape" mode. Many ramifications to be thought through... 03613 * I think I'm coming down strongly on the side of "No". The user 03614 * who wants to do that can very well invoke "Tokenizer Escape" 03615 * mode explicitly. 03616 * 03617 **************************************************************************** */ 03618 03619 static void fcode_starter( const char *token_name, int spread, bool is_offs16) 03620 { 03621 you_are_here(); 03622 if ( spread != 1 ) 03623 { 03624 tokenization_error( WARNING, "spread of %d not supported.\n", spread); 03625 } 03626 if ( fcode_started ) 03627 { 03628 tokenization_error( WARNING, 03629 "Only one \"FCode Starter\" permitted per tokenization. " 03630 "Ignoring...\n"); 03631 } else { 03632 03633 emit_fcodehdr(token_name); 03634 offs16 = is_offs16; 03635 fcode_started = TRUE; 03636 03637 current_device_node->ifile_name = strdup(iname); 03638 current_device_node->line_no = lineno; 03639 03640 if ( first_fc_starter ) 03641 { 03642 reset_fcode_ranges(); 03643 first_fc_starter = FALSE; 03644 }else{ 03645 set_next_fcode( nextfcode); 03646 } 03647 } 03648 } 03649 03650 /* ************************************************************************** 03651 * 03652 * Function name: fcode_end_err_check 03653 * Synopsis: Do error-checking at end of tokenization, 03654 * whether due to FCODE-END or end-of-file, 03655 * and reset the indicators we check. 03656 * 03657 * Inputs: 03658 * Parameters: NONE 03659 * Global Variables: 03660 * Data-Stack depth Is anything left on the stack? 03661 * 03662 * Outputs: 03663 * Returned Value: NONE 03664 * Global Variables: 03665 * Data-Stack Reset to empty 03666 * 03667 * Error Detection: 03668 * Unresolved control structures detected by clear_control_structs() 03669 * If anything is left on the stack, it indicates some incomplete 03670 * condition; we will treat it as a Warning. 03671 * 03672 **************************************************************************** */ 03673 03674 static void fcode_end_err_check( void) 03675 { 03676 bool stack_imbal = BOOLVAL( stackdepth() != 0 ); 03677 03678 if ( stack_imbal ) 03679 { 03680 tokenization_error( WARNING, 03681 "Stack imbalance before end of tokenization.\n"); 03682 } 03683 clear_stack(); 03684 clear_control_structs("End of tokenization"); 03685 } 03686 03687 /* ************************************************************************** 03688 * 03689 * Function name: fcode_ender 03690 * Synopsis: Respond to one of the "FCode Ender" words: 03691 * The FCode-token for "End0" or "End1" 03692 * has already been written to the 03693 * FCode Output buffer. 03694 * Finish the FCode header: fill in its 03695 * checksum and length. 03696 * Reset the token names defined in "normal" mode 03697 * (Does not reset the FCode-token number) 03698 * 03699 * Associated FORTH words: END0, END1 03700 * Associated Tokenizer directive: FCODE-END 03701 * 03702 * Inputs: 03703 * Parameters: NONE 03704 * Global Variables: 03705 * incolon If TRUE, a colon def'n has not been completed 03706 * last_colon_filename For error message. 03707 * last_colon_lineno For error message. 03708 * scope_is_global For error detection 03709 * is_instance For error detection 03710 * 03711 * Outputs: 03712 * Returned Value: NONE 03713 * Global Variables: 03714 * haveend Set to TRUE 03715 * fcode_started Reset to FALSE. Be ready to start anew. 03716 * FCode-defined tokens, aliases and macros -- i.e., those 03717 * *NOT* defined in tokenizer-escape mode -- are reset. 03718 * (Also, command-line-defined symbols are preserved). 03719 * Vocabularies will be reset 03720 * Device-node data structures will be deleted 03721 * Top-level device-node ifile_name and line_no fields 03722 * will be reset. 03723 * Memory Freed 03724 * Duplicate of Input-File name, in top-level device-node. 03725 * Printout: 03726 * Advisory message giving current value of nextfcode 03727 * (the "FCode-token Assignment Counter") 03728 * 03729 * Error Detection: 03730 * ERROR if a Colon definition has not been completed. 03731 * ERROR if "instance" is still in effect 03732 * WARNING if Global-Scope has not been terminated; compensate. 03733 * 03734 * Extraneous Remarks: 03735 * In order to accommodate odd cases, such as multiple FCode blocks 03736 * within a single PCI header, this routine does not automatically 03737 * reset nextfcode to h# 0800 03738 * 03739 **************************************************************************** */ 03740 03741 void fcode_ender(void) 03742 { 03743 if ( incolon ) 03744 { 03745 char *tmp_iname = iname; 03746 iname = last_colon_filename; 03747 unterm_is_colon = TRUE; 03748 warn_unterm( TKERROR, "Colon Definition", last_colon_lineno); 03749 iname = tmp_iname; 03750 } 03751 03752 haveend = TRUE; 03753 03754 if ( is_instance ) 03755 { 03756 unresolved_instance( TKERROR); 03757 } 03758 03759 if ( scope_is_global ) 03760 { 03761 tokenization_error( WARNING , 03762 "No DEVICE-DEFINITIONS directive encountered before end. " 03763 "Compensating...\n"); 03764 resume_device_scope(); 03765 } 03766 fcode_end_err_check(); 03767 reset_normal_vocabs(); 03768 finish_fcodehdr(); 03769 fcode_started = FALSE; 03770 03771 if ( current_device_node->ifile_name != default_top_dev_ifile_name ) 03772 { 03773 free( current_device_node->ifile_name ); 03774 current_device_node->ifile_name = default_top_dev_ifile_name; 03775 current_device_node->line_no = 0; 03776 } 03777 } 03778 03779 /* ************************************************************************** 03780 * 03781 * Function name: get_token 03782 * Synopsis: Read the next word in the input stream and retrieve 03783 * its FCode-token number. If it's not a symbol to 03784 * which a single token is assigned (e.g., if it's 03785 * a macro), report an error. 03786 * 03787 * Associated FORTH words: ['] ' 03788 * Associated Tokenizer directive: F['] 03789 * 03790 * Inputs: 03791 * Parameters: 03792 * *tok_entry Place to put the pointer to token entry 03793 * Global Variables: 03794 * statbuf The command being processed 03795 * pc Input stream character pointer 03796 * 03797 * Outputs: 03798 * Returned Value: TRUE if successful (i.e., no error) 03799 * Supplied Pointers: 03800 * *tok_entry The token entry, if no error 03801 * Global Variables: 03802 * statbuf The next word in the input stream 03803 * pc Restored to previous value if error 03804 * 03805 * Error Detection: 03806 * The next word in the input stream is expected to be on the 03807 * same line as the directive. The get_word_in_line() 03808 * routine will check for that. 03809 * If the next word in the input stream is not a symbol 03810 * for which a single-token FCode number is assigned, 03811 * report an ERROR and restore PC to its previous value. 03812 * 03813 **************************************************************************** */ 03814 03815 static bool get_token(tic_hdr_t **tok_entry) 03816 { 03817 bool retval = FALSE; 03818 u8 *save_pc; 03819 03820 /* Copy of command being processed, for error message */ 03821 char cmnd_cpy[FUNC_CPY_BUF_SIZE+1]; 03822 strncpy( cmnd_cpy, statbuf, FUNC_CPY_BUF_SIZE); 03823 cmnd_cpy[FUNC_CPY_BUF_SIZE] = 0; /* Guarantee null terminator. */ 03824 03825 save_pc = pc; 03826 03827 if ( get_word_in_line( statbuf) ) 03828 { 03829 fwtoken defr = UNSPECIFIED; 03830 03831 /* We need to scan the newest definitions first; they 03832 * might supercede standard ones. We need, though, 03833 * to bypass built-in FWords that need to trigger 03834 * some tokenizer internals before emitting their 03835 * synonymous FCode Tokens, (e.g., version1 , end0 , 03836 * and start{0-4}); if we find one of those, we will 03837 * need to search again, specifically within the list 03838 * of FCode Tokens. 03839 */ 03840 *tok_entry = lookup_with_definer( statbuf, &defr); 03841 if ( *tok_entry != NULL ) 03842 { 03843 /* Built-in FWords can be uniquely identified by their 03844 * definer, BI_FWRD_DEFN . The definer for "shared" 03845 * FWords is COMMON_FWORD but there are none of 03846 * those that might be synonymous with legitimate 03847 * FCode Tokens, nor are any likely ever to be... 03848 */ 03849 if ( defr == BI_FWRD_DEFN ) 03850 { 03851 *tok_entry = lookup_token( statbuf); 03852 retval = BOOLVAL( *tok_entry != NULL ); 03853 }else{ 03854 retval = entry_is_token( *tok_entry); 03855 } 03856 } 03857 03858 if ( INVERSE( retval) ) 03859 { 03860 cannot_apply( cmnd_cpy, strupr(statbuf), defr ); 03861 pc = save_pc; 03862 } 03863 } 03864 03865 return ( retval ); 03866 } 03867 03868 03869 static void base_change ( int new_base ) 03870 { 03871 if ( incolon && ( INVERSE( in_tokz_esc) ) ) 03872 { 03873 emit_literal(new_base ); 03874 emit_token("base"); 03875 emit_token("!"); 03876 } else { 03877 base = new_base; 03878 } 03879 } 03880 03881 static void base_val (int new_base) 03882 { 03883 u8 *old_pc; 03884 03885 char base_cmnd[FUNC_CPY_BUF_SIZE+1]; 03886 strncpy( base_cmnd, statbuf, FUNC_CPY_BUF_SIZE); 03887 base_cmnd[FUNC_CPY_BUF_SIZE] = 0; /* Guarantee NULL terminator */ 03888 03889 old_pc=pc; 03890 if ( get_word_in_line( statbuf) ) 03891 { 03892 u8 basecpy=base; 03893 03894 base = new_base; 03895 if ( ! handle_number() ) 03896 { 03897 /* We did get a word on the line, but it's not a valid number */ 03898 tokenization_error( WARNING , 03899 "Applying %s to non-numeric value. Ignoring.\n", 03900 strupr(base_cmnd) ); 03901 pc = old_pc; 03902 } 03903 base=basecpy; 03904 } 03905 } 03906 03907 03908 /* ************************************************************************** 03909 * 03910 * Function name: eval_string 03911 * Synopsis: Prepare to tokenize a string, artificially generated 03912 * by this program or created as a user-defined 03913 * Macro. When done, resume at existing source. 03914 * Keep the file-name and line-number unchanged. 03915 * 03916 * Inputs: 03917 * Parameters: 03918 * inp_bufr String (or buffer) to evaluate 03919 * 03920 * Outputs: 03921 * Returned Value: NONE 03922 * Global Variables, changed by call to init_inbuf(): 03923 * start Points to given string 03924 * pc ditto 03925 * end Points to end of given string 03926 * 03927 * Revision History: 03928 * Updated Thu, 23 Feb 2006 by David L. Paktor 03929 * This routine no longer calls its own instance of tokenize() 03930 * It has become the gateway to the mechanism that makes a 03931 * smooth transition between the body of the Macro, User- 03932 * defined Symbol or internally-generated string and the 03933 * resumption of processing the source file. 03934 * A similar (but more complicated) transition when processing 03935 * an FLOADed file will be handled elsewhere. 03936 * Updated Fri, 24 Feb 2006 by David L. Paktor 03937 * In order to support Macro-recursion protection, this routine 03938 * is no longer the gateway for Macros; they will have to 03939 * call push_source() directly. 03940 * 03941 **************************************************************************** */ 03942 03943 void eval_string( char *inp_bufr) 03944 { 03945 push_source( NULL, NULL, FALSE); 03946 init_inbuf( inp_bufr, strlen(inp_bufr)); 03947 } 03948 03949 03950 /* ************************************************************************** 03951 * 03952 * Function name: finish_or_new_device 03953 * Synopsis: Handle the shared logic for the NEW-DEVICE and 03954 * FINISH-DEVICE commands. 03955 * 03956 * Inputs: 03957 * Parameters: 03958 * finishing_device TRUE for FINISH-DEVICE, 03959 * FALSE for NEW-DEVICE 03960 * Global Variables: 03961 * incolon TRUE if inside a colon definition 03962 * noerrors TRUE if ignoring errors 03963 * scope_is_global TRUE if "global scope" in effect 03964 * Local Static Variables: 03965 * is_instance TRUE if "instance" is in effect 03966 * dev_change_instance_warning TRUE if warning hasn't been issued 03967 * 03968 * Outputs: 03969 * Returned Value: NONE 03970 * Local Static Variables: 03971 * dev_change_instance_warning FALSE if warning is issued 03972 * instance_definer_gap TRUE if "instance" is in effect 03973 * 03974 * Error Detection: 03975 * NEW-DEVICE and FINISH-DEVICE should not be used outside of 03976 * a colon-definition if global-scope is in effect. Error 03977 * message; no further action unless we are ignoring errors. 03978 * Issue a WARNING if INSTANCE wasn't resolved before the current 03979 * device-node is changed. Try not to be too repetitive... 03980 * 03981 * Process Explanation: 03982 * The words NEW-DEVICE and FINISH-DEVICE may be incorporated into 03983 * a colon-definition, whether the word is defined in global- 03984 * or device- -scope. Such an incorporation does not effect 03985 * a change in the device-node vocabulary; simply emit the token. 03986 * If we are in interpretation mode, though, we need to check for 03987 * errors before changing the device-node vocabulary: 03988 * If global-scope is in effect, we need to check whether we are 03989 * ignoring errors; if so, we will compensate by switching to 03990 * device-scope. 03991 * If "instance" is in effect, it's "dangling". It will remain 03992 * in effect through a device-node change, but this is very 03993 * bad style and deserves a WARNING, but only one for each 03994 * occurrence. It would be unaesthetic, to say the least, 03995 * to have multiple messages for the same dangling "instance" 03996 * in a "finish-device new-device" sequence. 03997 * We must be careful about the order we do things, because of 03998 * the messages printed as a side-effect of the node change... 03999 * 04000 * Extraneous Remarks: 04001 * I will violate strict structure here. 04002 * 04003 **************************************************************************** */ 04004 04005 static void finish_or_new_device( bool finishing_device ) 04006 { 04007 if ( INVERSE( incolon ) ) 04008 { 04009 if ( INVERSE( is_instance) ) 04010 { 04011 /* Arm warning for next time: */ 04012 dev_change_instance_warning = TRUE; 04013 }else{ 04014 /* Dangling "instance" */ 04015 instance_definer_gap = TRUE; 04016 /* Warn only once. */ 04017 if ( dev_change_instance_warning ) 04018 { 04019 unresolved_instance( WARNING); 04020 dev_change_instance_warning = FALSE; 04021 } 04022 } 04023 04024 /* Note: "Instance" cannot be in effect during "global" scope */ 04025 if ( scope_is_global ) 04026 { 04027 glob_not_allowed( TKERROR, noerrors ); 04028 if ( noerrors ) 04029 { 04030 resume_device_scope(); 04031 }else{ 04032 return; 04033 } 04034 } 04035 04036 if ( finishing_device ) 04037 { 04038 finish_device_vocab(); 04039 }else{ 04040 new_device_vocab(); 04041 } 04042 } 04043 emit_token( finishing_device ? "finish-device" : "new-device" ); 04044 } 04045 04046 04047 /* ************************************************************************** 04048 * 04049 * Function name: abort_quote 04050 * Synopsis: Optionally implement the ABORT" function as 04051 * though it were a macro. Control whether to allow 04052 * it, and which style to support, via switches set 04053 * on the command-line at run-time. 04054 * 04055 * Inputs: 04056 * Parameters: 04057 * tok Numeric-code associated with the 04058 * FORTH word that was just read. 04059 * Global Variables: 04060 * enable_abort_quote Whether to allow ABORT" 04061 * sun_style_abort_quote SUN-style versus Apple-style 04062 * abort_quote_throw Whether to use -2 THROW vs ABORT 04063 * 04064 * Outputs: 04065 * Returned Value: TRUE if it was handled 04066 * Global Variables: 04067 * report_multiline Reset to FALSE. 04068 * Printout: 04069 * ADVISORY: ABORT" in fcode is not defined by IEEE 1275-1994 04070 * 04071 * Error Detection: 04072 * Performed by other routines. If user selected not to 04073 * allow ABORT" , it will simply be treated as an 04074 * unknown word. 04075 * The string following it, however, will still be consumed. 04076 * 04077 * Process Explanation: 04078 * If the supplied tok was not ABORTTXT , then return FALSE. 04079 * If the enable_abort_quote flag is FALSE, consume the 04080 * string following the Abort" token, but be careful to 04081 * leave the Abort" token in statbuf, as it will be used 04082 * for the error message. 04083 * Otherwise, create and prepare for processing the appropriate Macro: 04084 * For Apple Style, we push the specified string onto the stack 04085 * and do -2 THROW (and hope the stack unwinds correctly). 04086 * For Sun Style, we test the condition on top of the stack, 04087 * and if it's true, print the specified string before we 04088 * do the -2 THROW. 04089 * We perform the underlying operations directly: placing an "IF" 04090 * (if Sun Style), then placing the string. This bypasses 04091 * any issues of double-parsing, as well as of doubly checking 04092 * for a multi-line string. 04093 * Finally, we perform the operational equivalents of the remainder 04094 * of the command sequence. 04095 * 04096 * Extraneous Remarks: 04097 * I would have preferred not to have to directly perform the under- 04098 * lying operations, and instead simply prepare the entire command 04099 * sequence in a buffer, but I needed to handle the case where 04100 * quote-escaped quotes are included in the string: If the string 04101 * were simply to be reproduced into the buffer, the quote-escaped 04102 * quotes would appear as plain quote-marks and terminate the 04103 * string parsing prematurely, leaving the rest of the string 04104 * to be treated as code instead of text... 04105 * Also, the introduction of the variability of whether to do the 04106 * -2 THROW or to compile-in the token for ABORT makes the 04107 * buffer-interpretation scheme somewhat too messy for my tastes. 04108 * 04109 **************************************************************************** */ 04110 04111 static bool abort_quote( fwtoken tok) 04112 { 04113 bool retval = FALSE; 04114 if ( tok == ABORTTXT ) 04115 { 04116 if ( ! enable_abort_quote ) 04117 { 04118 /* ABORT" is not enabled; we'd better consume the string */ 04119 char *save_statbuf; 04120 signed long wlen; 04121 save_statbuf = strdup( (char *)statbuf); 04122 wlen = get_string( FALSE); 04123 strcpy( statbuf, save_statbuf); 04124 free( save_statbuf); 04125 }else{ 04126 /* ABORT" is not to be used in FCODE drivers 04127 * but Apple drivers do use it. Therefore we 04128 * allow it. We push the specified string to 04129 * the stack, do -2 THROW and hope that THROW 04130 * will correctly unwind the stack. 04131 * Presumably, Apple Source supplies its own 04132 * IF ... THEN 04133 */ 04134 char *abort_string; 04135 signed long wlen; 04136 04137 retval = TRUE; 04138 tokenization_error (INFO, "ABORT\" in fcode not " 04139 "defined by IEEE 1275-1994\n"); 04140 test_in_colon("ABORT\"", TRUE, TKERROR, NULL); 04141 wlen=get_string( TRUE); 04142 04143 if ( sun_style_abort_quote ) emit_if(); 04144 04145 emit_token("b(\")"); 04146 emit_string(statbuf, wlen); 04147 04148 if ( sun_style_abort_quote ) emit_token("type"); 04149 04150 if ( abort_quote_throw ) 04151 { 04152 emit_literal( -2); 04153 emit_token("throw"); 04154 }else{ 04155 emit_token("abort"); 04156 } 04157 04158 if ( sun_style_abort_quote ) emit_then(); 04159 /* Sun Style */ 04160 abort_string = " type -2 THROW THEN:" ; 04161 } 04162 } 04163 return( retval ); 04164 } 04165 04166 04167 /* ************************************************************************** 04168 * 04169 * Function name: create_alias 04170 * Synopsis: Create an alias, as specified by the user 04171 * 04172 * Associated FORTH word: ALIAS 04173 * 04174 * Inputs: 04175 * Parameters: NONE 04176 * Global Variables: 04177 * incolon Colon-def'n-in-progress indicator 04178 * in_tokz_esc "Tokenizer Escape" mode indicator 04179 * Input Stream 04180 * Two words will be read. 04181 * 04182 * Outputs: 04183 * Returned Value: TRUE if succeeded. 04184 * Global Variables: 04185 * statbuf New name will be copied back into here. 04186 * Memory Allocated 04187 * The two words will be copied into freshly-allocated memory 04188 * that will be passed to the create_..._alias() routine. 04189 * When Freed? 04190 * When Current Device Vocabulary is "finished", or at end 04191 * of tokenization, or upon termination of program. 04192 * If not able to create alias, the copies will be freed here. 04193 * 04194 * Error Detection: 04195 * If the ALIAS command was given during colon-definition, that 04196 * can be handled by this tokenizer, but it is not supported 04197 * by IEEE 1275-1994. Issue a WARNING. 04198 * If the new name is a copy of an existing word-name, issue a warning. 04199 * If the word to which an alias is to be created does not exist 04200 * in the appropriate mode -- relative to "Tokenizer-Escape" -- 04201 * that is an ERROR. 04202 * If "instance" is in effect, the ALIAS command is an ERROR. 04203 * 04204 * Process Explanation: 04205 * Get two words -- the new name and the "old" word -- from the 04206 * same line of input as the ALIAS command. 04207 * Copy the new name back into statbuf for use in trace_creation. 04208 * Determine whether or not we are in "Tokenizer-Escape" mode. 04209 * Subsequent searches will take place in that same mode. 04210 * If the "new" name already exists, issue a warning. 04211 * In each vocabulary applicable to the current mode -- i.e., 04212 * "Tokenizer-Escape" or "Normal" -- (except: cannot 04213 * make aliases to "Locals"): 04214 * Try using the create_..._alias() routine. 04215 * If it succeeds, we are done. 04216 * IMPORTANT: The order in which we try the vocabularies MUST 04217 * match the order in which tokenize_one_word() searches them. 04218 * If all the attempts failed, the "old" word does not exist; 04219 * declare an ERROR and free up the memory that was allocated. 04220 * 04221 * Extraneous Remarks: 04222 * With the separation of the tokenizer[ state, this 04223 * function has become too complicated to keep as a 04224 * simple CASE in the big SWITCH statement anymore... 04225 * 04226 * I had earlier thought that it was sufficient to create a 04227 * macro linking the "new" name to the "old" word. There 04228 * were too many cases, though, where that didn't work. 04229 * This is cleaner. 04230 * 04231 * I will not be adhering to the strict rules of structure in 04232 * this routine, as it would get me nested too deeply... 04233 * 04234 * Revision History: 04235 * Updated Tue, 10 Jan 2006 by David L. Paktor 04236 * Convert to tic_hdr_t type vocabularies. 04237 * 04238 **************************************************************************** */ 04239 04240 static bool create_alias( void ) 04241 { 04242 char *new_alias ; 04243 04244 validate_instance(ALIAS); 04245 if ( incolon ) 04246 { 04247 tokenization_error ( WARNING, 04248 "ALIAS during colon-definition " 04249 "is not supported by IEEE 1275-1994\n"); 04250 } 04251 if ( get_word_in_line( "ALIAS") ) 04252 { 04253 04254 new_alias = strdup((char *)statbuf); 04255 04256 if (get_word_in_line( "ALIAS") ) 04257 { 04258 char *old_name = strdup((char *)statbuf) ; 04259 04260 /* Copy the "new" alias name back into statbuf. 04261 * This is a HACK ^H^H^H^H awkward way to retrofit 04262 * support for the trace_creation() function. 04263 */ 04264 strcpy( statbuf, new_alias); 04265 04266 /* We don't call trace_creation() here because we don't 04267 * know if the creation succeeded. However, we want 04268 * to issue a "Duplicate" warning based on the attempt, 04269 * even if it doesn't succeed. 04270 * We would prefer to have the "Trace" message precede the 04271 * "Duplicate" warning, but we don't think it's worth 04272 * the effort. When it becomes worthwhile, the way to 04273 * do it would be to factor out the block that handles 04274 * normal-tokenization versus "Tokenizer-Escape" mode; 04275 * condition the "Trace" message on its success-return, 04276 * show the "Duplicate" warning in any case, then show 04277 * the error-message and do the cleanup conditioned on 04278 * a failure-return. 04279 * That will also obviate the need for a return value from 04280 * this routine and for the copy-back into statbuf. 04281 */ 04282 warn_if_duplicate(new_alias); 04283 04284 /* 04285 * Here is where we begin trying the create_..._alias() 04286 * routines for the vocabularies. 04287 */ 04288 04289 /* 04290 * Distinguish between "Normal" tokenization mode 04291 * and "Tokenizer Escape" mode 04292 */ 04293 if ( in_tokz_esc ) 04294 { 04295 if ( create_tokz_esc_alias( new_alias, old_name) ) 04296 return(TRUE); 04297 04298 /* 04299 * Handle the classes of operatives that are common between 04300 * "Tokenizer Escape" mode and "Normal" tokenization mode. 04301 * Those classes include selected non-fcode forth constructs 04302 * and Conditional-Compilation Operators. 04303 */ 04304 { 04305 tic_hdr_t *found = lookup_shared_word( old_name); 04306 if ( found != NULL ) 04307 { 04308 if ( create_core_alias( new_alias, old_name) ) 04309 return(TRUE); 04310 } 04311 } 04312 }else{ 04313 /* "Normal" tokenization mode */ 04314 04315 /* Can create aliases for "Locals", why not? */ 04316 if ( create_local_alias( new_alias, old_name) ) 04317 return(TRUE); 04318 04319 /* 04320 * All other classes of operatives -- non-fcode forth 04321 * constructs, Standard and user-defined fcode 04322 * tokens, Macros, and Conditional-Compilation 04323 * Operators, -- are included in the "currently 04324 * active" vocabulary. 04325 */ 04326 04327 if ( create_current_alias( new_alias, old_name) ) 04328 return(TRUE); 04329 04330 } /* End of separate handling for normal-tokenization mode 04331 * versus "Tokenizer-Escape" mode 04332 */ 04333 04334 /* It's not a word, a macro or any of that other stuff. */ 04335 tokenized_word_error(old_name); 04336 free(old_name); 04337 } 04338 free (new_alias); 04339 } 04340 return(FALSE); 04341 } 04342 04343 04344 /* ************************************************************************** 04345 * 04346 * Function name: string_err_check 04347 * Synopsis: Error-check after processing or Ignoring 04348 * simple strings 04349 * 04350 * Inputs: 04351 * Parameters: 04352 * is_paren TRUE if string is Dot-Paren .( 04353 * FALSE if Ess-Quote ( s" ) 04354 * sav_lineno Saved Line Number, for Unterminated Error 04355 * strt_lineno Start Line Number, for Multiline Warning 04356 * Global Variables: 04357 * noerrors TRUE if ignoring errors 04358 * Local Static Variables: 04359 * got_until_eof TRUE if reached end of buffer before delim. 04360 * 04361 * Outputs: 04362 * Returned Value: TRUE if did not reach end of buffer, or, 04363 * if ignoring errors, TRUE anyway. 04364 * 04365 * Error Detection: 04366 * Multi-line warning, "Unterminated" Error messages, as apppropriate 04367 * 04368 **************************************************************************** */ 04369 04370 static bool string_err_check( bool is_paren, 04371 unsigned int sav_lineno, 04372 unsigned int strt_lineno ) 04373 { 04374 bool retval = noerrors ; 04375 char *item_typ = is_paren ? 04376 "Dot-Paren" : "Ess-Quote" ; 04377 if ( got_until_eof ) /* Crude retrofit... */ 04378 { 04379 warn_unterm( TKERROR, item_typ, sav_lineno ); 04380 }else{ 04381 retval = TRUE; 04382 warn_if_multiline( item_typ, strt_lineno ); 04383 } 04384 return( retval); 04385 } 04386 04387 04388 /* ************************************************************************** 04389 * 04390 * Function name: handle_internal 04391 * Synopsis: Perform the functions associated with FORTH words 04392 * that do not map directly to a single token. This 04393 * is the functions that will go into the FUNCT field 04394 * of entries in the "FWords" and "Shared Words" lists. 04395 * 04396 * Inputs: 04397 * Parameters: 04398 * pfield Param-field of the tic_hdr_t -type entry 04399 * associated with the FORTH-Word (FWord) 04400 * just read that is being "handled". 04401 * Global Variables: 04402 * statbuf The word that was just read. 04403 * 04404 * Outputs: 04405 * Returned Value: NONE 04406 * Global Variables: 04407 * statbuf More words may be read. 04408 * 04409 * Error Detection: 04410 * Too numerous to list here... 04411 * 04412 * Process Explanation: 04413 * Recast the type of the param-field of a tic_hdr_t -type 04414 * entry and rename it "tok". 04415 * The "tok" will be used as the control-expression for a 04416 * SWITCH statement with a large number of CASE labels. 04417 * Both "FWords" and "shared_words" list entries will 04418 * be processed by this routine. 04419 * 04420 * Revision History: 04421 * Updated Wed, 20 Jul 2005 by David L. Paktor 04422 * Put handling of ABORT" under control of a run-time 04423 * command-line switch. 04424 * Put decision to support IBM-style Locals under control 04425 * of a run-time command-line switch. 04426 * Updated Tue, 17 Jan 2006 by David L. Paktor 04427 * Convert to handler for tic_hdr_t type vocab entries. 04428 * 04429 * Extraneous Remarks: 04430 * We would prefer to keep this function private, so we will 04431 * declare its prototype here and in the one other file 04432 * where we need it, namely, dictionary.c, rather than 04433 * exporting it widely in a .h file. 04434 * 04435 **************************************************************************** */ 04436 04437 void handle_internal( tic_param_t pfield); 04438 void handle_internal( tic_param_t pfield) 04439 { 04440 fwtoken tok = pfield.fw_token; 04441 04442 signed long wlen; 04443 unsigned int sav_lineno = lineno; /* For error message */ 04444 04445 bool handy_toggle = TRUE ; /* Various uses... */ 04446 04447 #ifdef DEBUG_SCANNER 04448 printf("%s:%d: debug: tokenizing control word '%s'\n", 04449 iname, lineno, statbuf); 04450 #endif 04451 switch (tok) { 04452 case BEGIN: 04453 emit_begin(); 04454 break; 04455 04456 case BUFFER: 04457 if ( create_word(tok) ) 04458 { 04459 emit_token("b(buffer:)"); 04460 } 04461 break; 04462 04463 case CONST: 04464 if ( create_word(tok) ) 04465 { 04466 emit_token("b(constant)"); 04467 } 04468 break; 04469 04470 case COLON: 04471 { 04472 /* Collect error- -detection or -reporting items, 04473 * but don't commit until we're sure the 04474 * creation was a success. 04475 */ 04476 u16 maybe_last_colon_fcode = nextfcode ; 04477 unsigned int maybe_last_colon_lineno = lineno; 04478 unsigned int maybe_last_colon_abs_token_no = abs_token_no; 04479 unsigned int maybe_last_colon_do_depth = do_loop_depth; 04480 /* last_colon_defname 04481 * has to wait until after call to create_word() 04482 */ 04483 04484 if ( create_word(tok) ) 04485 { 04486 last_colon_fcode = maybe_last_colon_fcode; 04487 last_colon_lineno = maybe_last_colon_lineno; 04488 last_colon_abs_token_no = maybe_last_colon_abs_token_no; 04489 last_colon_do_depth = maybe_last_colon_do_depth; 04490 collect_input_filename( &last_colon_filename); 04491 /* Now we can get last_colon_defname */ 04492 if ( last_colon_defname != NULL ) 04493 { 04494 free( last_colon_defname); 04495 } 04496 last_colon_defname = strdup(statbuf); 04497 04498 emit_token("b(:)"); 04499 incolon=TRUE; 04500 hide_last_colon(); 04501 lastcolon = opc; 04502 } 04503 } 04504 break; 04505 04506 case SEMICOLON: 04507 if ( test_in_colon("SEMICOLON", TRUE, TKERROR, NULL) ) 04508 { 04509 ret_stk_balance_rpt( "termination,", TRUE); 04510 /* Clear Control Structures just back to where 04511 * the current Colon-definition began. 04512 */ 04513 clear_control_structs_to_limit( 04514 "End of colon-definition", last_colon_abs_token_no); 04515 04516 if ( ibm_locals ) 04517 { 04518 finish_locals(); 04519 forget_locals(); 04520 } 04521 04522 emit_token("b(;)"); 04523 incolon=FALSE; 04524 reveal_last_colon(); 04525 } 04526 break; 04527 04528 case CREATE: 04529 if ( create_word(tok) ) 04530 { 04531 emit_token("b(create)"); 04532 } 04533 break; 04534 04535 case DEFER: 04536 if ( create_word(tok) ) 04537 { 04538 emit_token("b(defer)"); 04539 } 04540 break; 04541 04542 case ALLOW_MULTI_LINE: 04543 report_multiline = FALSE; 04544 break; 04545 04546 case OVERLOAD: 04547 if ( test_in_colon(statbuf, FALSE, WARNING, NULL) ) 04548 { 04549 do_not_overload = FALSE; 04550 } 04551 break; 04552 04553 case DEFINED: 04554 if (get_word_in_line( statbuf) ) 04555 { 04556 eval_user_symbol(statbuf); 04557 } 04558 break; 04559 04560 case CL_FLAG: 04561 if (get_word_in_line( statbuf) ) 04562 { 04563 set_cl_flag( statbuf, TRUE); 04564 } 04565 break; 04566 04567 case SHOW_CL_FLAGS: 04568 show_all_cl_flag_settings( TRUE); 04569 break; 04570 04571 case FIELD: 04572 if ( create_word(tok) ) 04573 { 04574 emit_token("b(field)"); 04575 } 04576 break; 04577 04578 case VALUE: 04579 if ( create_word(tok) ) 04580 { 04581 emit_token("b(value)"); 04582 } 04583 break; 04584 04585 case VARIABLE: 04586 if ( create_word(tok) ) 04587 { 04588 emit_token("b(variable)"); 04589 } 04590 break; 04591 04592 case AGAIN: 04593 emit_again(); 04594 break; 04595 04596 case ALIAS: 04597 if ( create_alias() ) 04598 { 04599 trace_creation( ALIAS, statbuf); 04600 } 04601 break; 04602 04603 case CONTROL: 04604 if ( get_word_in_line( statbuf) ) 04605 { 04606 emit_literal(statbuf[0]&0x1f); 04607 } 04608 break; 04609 04610 case DO: 04611 emit_token("b(do)"); 04612 mark_do(); 04613 break; 04614 04615 case CDO: 04616 emit_token("b(?do)"); 04617 mark_do(); 04618 break; 04619 04620 case ELSE: 04621 emit_else(); 04622 break; 04623 04624 case CASE: 04625 emit_case(); 04626 break; 04627 04628 case ENDCASE: 04629 emit_endcase(); 04630 break; 04631 04632 case NEW_DEVICE: 04633 handy_toggle = FALSE; 04634 case FINISH_DEVICE: 04635 finish_or_new_device( handy_toggle ); 04636 break; 04637 04638 case FLITERAL: 04639 { 04640 u32 val; 04641 val = dpop(); 04642 emit_literal(val); 04643 } 04644 break; 04645 04646 case OF: 04647 emit_of(); 04648 break; 04649 04650 case ENDOF: 04651 emit_endof(); 04652 break; 04653 04654 case EXTERNAL: 04655 set_hdr_flag( FLAG_EXTERNAL ); 04656 break; 04657 04658 case HEADERLESS: 04659 set_hdr_flag( FLAG_HEADERLESS ); 04660 break; 04661 04662 case HEADERS: 04663 set_hdr_flag( FLAG_HEADERS ); 04664 break; 04665 04666 case DECIMAL: 04667 /* in a definition this is expanded as macro "10 base !" */ 04668 base_change ( 0x0a ); 04669 break; 04670 04671 case HEX: 04672 base_change ( 0x10 ); 04673 break; 04674 04675 case OCTAL: 04676 base_change ( 0x08 ); 04677 break; 04678 04679 case OFFSET16: 04680 if (!offs16) 04681 { 04682 tokenization_error(INFO, "Switching to 16-bit offsets.\n"); 04683 }else{ 04684 tokenization_error(WARNING, 04685 "Call of OFFSET16 is redundant.\n"); 04686 } 04687 emit_token("offset16"); 04688 offs16=TRUE; 04689 break; 04690 04691 case IF: 04692 emit_if(); 04693 break; 04694 04695 /* ************************************************************************** 04696 * 04697 * Still to be done: 04698 * Correct analysis of Return-Stack usage within Do-Loops 04699 * or before Loop Elements like I and J or UNLOOP or LEAVE. 04700 * 04701 **************************************************************************** */ 04702 case UNLOOP: 04703 emit_token("unloop"); 04704 must_be_deep_in_do(1); 04705 break; 04706 04707 case LEAVE: 04708 emit_token("b(leave)"); 04709 must_be_deep_in_do(1); 04710 break; 04711 04712 case LOOP_I: 04713 emit_token("i"); 04714 must_be_deep_in_do(1); 04715 break; 04716 04717 case LOOP_J: 04718 emit_token("j"); 04719 must_be_deep_in_do(2); 04720 break; 04721 04722 case LOOP: 04723 emit_token("b(loop)"); 04724 resolve_loop(); 04725 break; 04726 04727 case PLUS_LOOP: 04728 emit_token("b(+loop)"); 04729 resolve_loop(); 04730 break; 04731 04732 04733 case INSTANCE: 04734 { 04735 bool set_instance_state = FALSE; 04736 bool emit_instance = TRUE; 04737 /* We will treat "instance" in a colon-definition as 04738 * an error, but allow it to be emitted if we're 04739 * ignoring errors; if we're not ignoring errors, 04740 * there's no output anyway... 04741 */ 04742 if ( test_in_colon(statbuf, FALSE, TKERROR, NULL) ) 04743 { /* We are in interpretation (not colon) state. */ 04744 /* "Instance" not allowed during "global" scope */ 04745 if ( scope_is_global ) 04746 { 04747 glob_not_allowed( WARNING, FALSE ); 04748 emit_instance = FALSE; 04749 }else{ 04750 set_instance_state = TRUE; 04751 } 04752 } 04753 if ( emit_instance ) 04754 { 04755 if ( set_instance_state ) 04756 { 04757 /* "Instance" isn't cumulative.... */ 04758 if ( is_instance ) 04759 { 04760 unresolved_instance( WARNING); 04761 } 04762 collect_input_filename( &instance_filename); 04763 instance_lineno = lineno; 04764 is_instance = TRUE; 04765 dev_change_instance_warning = TRUE; 04766 } 04767 emit_token("instance"); 04768 } 04769 } 04770 break; 04771 04772 case GLOB_SCOPE: 04773 if ( test_in_colon(statbuf, FALSE, TKERROR, NULL) ) 04774 { 04775 if ( INVERSE( is_instance) ) 04776 { 04777 enter_global_scope(); 04778 }else{ 04779 tokenization_error( TKERROR, 04780 "Global Scope not allowed. " 04781 "\"Instance\" is in effect; issued" ); 04782 just_where_started( instance_filename, 04783 instance_lineno ); 04784 } 04785 } 04786 break; 04787 04788 case DEV_SCOPE: 04789 if ( test_in_colon(statbuf, FALSE, TKERROR, NULL) ) 04790 { 04791 resume_device_scope(); 04792 } 04793 break; 04794 04795 case TICK: /* ' */ 04796 test_in_colon(statbuf, FALSE, WARNING, "[']"); 04797 case BRACK_TICK: /* ['] */ 04798 { 04799 tic_hdr_t *token_entry; 04800 if ( get_token( &token_entry) ) 04801 { 04802 emit_token("b(')"); 04803 /* Emit the token; warning or whatever comes gratis */ 04804 token_entry->funct( token_entry->pfield); 04805 } 04806 } 04807 break; 04808 04809 case F_BRACK_TICK: /* F['] <name> 04810 * emits the token-number for <name> 04811 * Mainly useful to compute the argument 04812 * to get-token or set-token 04813 */ 04814 { 04815 tic_hdr_t *token_entry; 04816 if ( get_token( &token_entry) ) 04817 { 04818 /* "Obsolete" warning doesn't come gratis here... */ 04819 token_entry_warning( token_entry); 04820 /* In Tokenizer-Escape mode, push the token */ 04821 if ( in_tokz_esc ) 04822 { 04823 dpush( token_entry->pfield.deflt_elem); 04824 }else{ 04825 emit_literal( token_entry->pfield.deflt_elem); 04826 } 04827 } 04828 } 04829 break; 04830 04831 case CHAR: 04832 handy_toggle = FALSE; 04833 case CCHAR: 04834 test_in_colon(statbuf, handy_toggle, WARNING, 04835 handy_toggle ? "CHAR" : "[CHAR]" ); 04836 case ASCII: 04837 if ( get_word_in_line( statbuf) ) 04838 { 04839 emit_literal(statbuf[0]); 04840 } 04841 break; 04842 04843 case UNTIL: 04844 emit_until(); 04845 break; 04846 04847 case WHILE: 04848 emit_while(); 04849 break; 04850 04851 case REPEAT: 04852 emit_repeat(); 04853 break; 04854 04855 case THEN: 04856 emit_then(); 04857 break; 04858 04859 case IS: 04860 tokenization_error ( INFO, 04861 "Substituting TO for deprecated IS\n"); 04862 case TO: 04863 if ( validate_to_target() ) 04864 { 04865 emit_token("b(to)"); 04866 } 04867 break; 04868 04869 case FLOAD: 04870 if ( get_word_in_line( statbuf) ) 04871 { 04872 bool stream_ok ; 04873 04874 push_source( close_stream, NULL, TRUE) ; 04875 04876 tokenization_error( INFO, "FLOADing %s\n", statbuf ); 04877 04878 stream_ok = init_stream( statbuf ); 04879 if ( INVERSE( stream_ok) ) 04880 { 04881 drop_source(); 04882 } 04883 } 04884 break; 04885 04886 case STRING: /* Double-Quote ( " ) string */ 04887 handy_toggle = FALSE; 04888 case PSTRING: /* Dot-Quote ( ." ) string */ 04889 wlen=get_string( TRUE); 04890 emit_token("b(\")"); 04891 emit_string(statbuf, wlen); 04892 if ( handy_toggle ) 04893 { 04894 emit_token("type"); 04895 } 04896 break; 04897 04898 case SSTRING: /* Ess-Quote ( s" ) string */ 04899 handy_toggle = FALSE; 04900 case PBSTRING: /* Dot-Paren .( string */ 04901 if (*pc++=='\n') lineno++; 04902 { 04903 unsigned int strt_lineno = lineno; 04904 wlen = get_until( handy_toggle ? ')' : '"' ); 04905 if ( string_err_check( handy_toggle, 04906 sav_lineno, strt_lineno) ) 04907 { 04908 emit_token("b(\")"); 04909 emit_string(statbuf, wlen); 04910 if ( handy_toggle ) 04911 { 04912 emit_token("type"); 04913 } 04914 } 04915 } 04916 break; 04917 04918 case FUNC_NAME: 04919 if ( test_in_colon( statbuf, TRUE, TKERROR, NULL) ) 04920 { 04921 if ( in_tokz_esc ) 04922 { 04923 tokenization_error( P_MESSAGE, "Currently" ); 04924 in_last_colon(); 04925 }else{ 04926 emit_token("b(\")"); 04927 emit_string( last_colon_defname, 04928 strlen( last_colon_defname) ); 04929 /* if ( hdr_flag == FLAG_HEADERLESS ) { WARNING } */ 04930 } 04931 } 04932 break; 04933 04934 case IFILE_NAME: 04935 emit_token("b(\")"); 04936 emit_string( iname, strlen( iname) ); 04937 break; 04938 04939 case ILINE_NUM: 04940 emit_literal( lineno); 04941 break; 04942 04943 case HEXVAL: 04944 base_val (0x10); 04945 break; 04946 04947 case DECVAL: 04948 base_val (0x0a); 04949 break; 04950 04951 case OCTVAL: 04952 base_val (8); 04953 break; 04954 04955 case ASC_LEFT_NUM: 04956 handy_toggle = FALSE; 04957 case ASC_NUM: 04958 if (get_word_in_line( statbuf) ) 04959 { 04960 if ( handy_toggle ) 04961 { 04962 ascii_right_number( statbuf); 04963 } else { 04964 ascii_left_number( statbuf); 04965 } 04966 } 04967 break; 04968 04969 case CONDL_ENDER: /* Conditional directives out of context */ 04970 case CONDL_ELSE: 04971 tokenization_error ( TKERROR, 04972 "No conditional preceding %s directive\n", 04973 strupr(statbuf) ); 04974 break; 04975 04976 case PUSH_FCODE: 04977 tokenization_error( INFO, 04978 "FCode-token Assignment Counter of 0x%x " 04979 "has been saved on stack.\n", nextfcode ); 04980 dpush( (long)nextfcode ); 04981 break; 04982 04983 case POP_FCODE: 04984 pop_next_fcode(); 04985 break; 04986 04987 case RESET_FCODE: 04988 tokenization_error( INFO, 04989 "Encountered %s. Resetting FCode-token " 04990 "Assignment Counter. ", strupr(statbuf) ); 04991 list_fcode_ranges( FALSE); 04992 reset_fcode_ranges(); 04993 break; 04994 04995 case EXIT: 04996 if ( test_in_colon( statbuf, TRUE, TKERROR, NULL) 04997 || noerrors ) 04998 { 04999 ret_stk_balance_rpt( NULL, FALSE); 05000 if ( ibm_locals ) 05001 { 05002 finish_locals (); 05003 } 05004 emit_token("exit"); 05005 } 05006 break; 05007 05008 case ESCAPETOK: 05009 enter_tokz_esc(); 05010 break; 05011 05012 case VERSION1: 05013 case FCODE_V1: 05014 tokenization_error( INFO, "Using version1 header " 05015 "(8-bit offsets).\n"); 05016 fcode_starter( "version1", 1, FALSE) ; 05017 break; 05018 05019 case START1: 05020 case FCODE_V2: 05021 case FCODE_V3: /* Full IEEE 1275 */ 05022 fcode_starter( "start1", 1, TRUE); 05023 break; 05024 05025 case START0: 05026 fcode_starter( "start0", 0, TRUE); 05027 break; 05028 05029 case START2: 05030 fcode_starter( "start2", 2, TRUE); 05031 break; 05032 05033 case START4: 05034 fcode_starter( "start4", 4, TRUE); 05035 break; 05036 05037 case END1: 05038 tokenization_error( WARNING, 05039 "Appearance of END1 in FCode source code " 05040 "is not intended by IEEE 1275-1994\n"); 05041 handy_toggle = FALSE; 05042 case END0: 05043 case FCODE_END: 05044 if ( handy_toggle ) 05045 { 05046 you_are_here(); 05047 } 05048 emit_token( handy_toggle ? "end0" : "end1" ); 05049 fcode_ender(); 05050 FFLUSH_STDOUT 05051 break; 05052 05053 case RECURSE: 05054 if ( test_in_colon(statbuf, TRUE, TKERROR, NULL ) ) 05055 { 05056 emit_fcode(last_colon_fcode); 05057 } 05058 break; 05059 05060 05061 case RECURSIVE: 05062 if ( test_in_colon(statbuf, TRUE, TKERROR, NULL ) ) 05063 { 05064 reveal_last_colon(); 05065 } 05066 break; 05067 05068 case RET_STK_FETCH: 05069 ret_stk_access_rpt(); 05070 emit_token( "r@"); 05071 break; 05072 05073 case RET_STK_FROM: 05074 ret_stk_access_rpt(); 05075 bump_ret_stk_depth( -1); 05076 emit_token( "r>"); 05077 break; 05078 05079 case RET_STK_TO: 05080 bump_ret_stk_depth( 1); 05081 emit_token( ">r"); 05082 break; 05083 05084 case PCIHDR: 05085 emit_pcihdr(); 05086 break; 05087 05088 case PCIEND: 05089 finish_pcihdr(); 05090 reset_fcode_ranges(); 05091 FFLUSH_STDOUT 05092 break; 05093 05094 case PCIREV: 05095 pci_image_rev = dpop(); 05096 tokenization_error( INFO, 05097 "PCI header revision=0x%04x%s\n", pci_image_rev, 05098 big_end_pci_image_rev ? 05099 ". Will be saved in Big-Endian format." 05100 : "" ); 05101 break; 05102 05103 case NOTLAST: 05104 handy_toggle = FALSE; 05105 case ISLAST: 05106 dpush(handy_toggle); 05107 case SETLAST: 05108 { 05109 u32 val = dpop(); 05110 bool new_pili = BOOLVAL( (val != 0) ); 05111 if ( pci_is_last_image != new_pili ) 05112 { 05113 tokenization_error( INFO, 05114 new_pili ? 05115 "Last image for PCI header.\n" : 05116 "PCI header not last image.\n" ); 05117 pci_is_last_image = new_pili; 05118 } 05119 } 05120 break; 05121 05122 case SAVEIMG: 05123 if (get_word_in_line( statbuf) ) 05124 { 05125 free(oname); 05126 oname = strdup( statbuf ); 05127 tokenization_error( INFO, 05128 "Output is redirected to file: %s\n", oname); 05129 } 05130 break; 05131 05132 case RESETSYMBS: 05133 tokenization_error( INFO, 05134 "Resetting symbols defined in %s mode.\n", 05135 in_tokz_esc ? "tokenizer-escape" : "\"normal\""); 05136 if ( in_tokz_esc ) 05137 { 05138 reset_tokz_esc(); 05139 }else{ 05140 reset_normal_vocabs(); 05141 } 05142 break; 05143 05144 case FCODE_DATE: 05145 handy_toggle = FALSE; 05146 case FCODE_TIME: 05147 { 05148 time_t tt; 05149 char temp_buffr[32]; 05150 05151 tt=time(NULL); 05152 if ( handy_toggle ) 05153 { 05154 strftime(temp_buffr, 32, "%T %Z", localtime(&tt)); 05155 }else{ 05156 strftime(temp_buffr, 32, "%m/%d/%Y", localtime(&tt)); 05157 } 05158 if ( in_tokz_esc ) 05159 { 05160 tokenization_error( MESSAGE, temp_buffr); 05161 }else{ 05162 emit_token("b(\")"); 05163 emit_string((u8 *)temp_buffr, strlen(temp_buffr) ); 05164 } 05165 } 05166 break; 05167 05168 case ENCODEFILE: 05169 if (get_word_in_line( statbuf) ) 05170 { 05171 encode_file( (char*)statbuf ); 05172 } 05173 break; 05174 05175 default: 05176 /* IBM-style Locals, under control of a switch */ 05177 if ( ibm_locals ) 05178 { 05179 bool found_it = TRUE; 05180 switch (tok) { 05181 case CURLY_BRACE: 05182 declare_locals( FALSE); 05183 break; 05184 case DASH_ARROW: 05185 assign_local(); 05186 break; 05187 default: 05188 found_it = FALSE; 05189 } 05190 if ( found_it ) break; 05191 } 05192 05193 /* Down here, we have our last chance to recognize a token. 05194 * If abort_quote is disallowed, we will still consume 05195 * the string. In case the string spans more than one 05196 * line, we want to make sure the line number displayed 05197 * in the error-message is the one on which the disallowed 05198 * abort_quote token appeared, not the one where the 05199 * string ended; therefore, we might need to be able to 05200 * "fake-out" the line number... 05201 */ 05202 { 05203 bool fake_out_lineno = FALSE; 05204 unsigned int save_lineno = lineno; 05205 unsigned int true_lineno; 05206 if ( abort_quote( tok) ) 05207 { break; 05208 }else{ 05209 if ( tok == ABORTTXT ) fake_out_lineno = TRUE; 05210 } 05211 true_lineno = lineno; 05212 05213 if ( fake_out_lineno ) lineno = save_lineno; 05214 tokenization_error ( TKERROR, 05215 "Unimplemented control word '%s'\n", strupr(statbuf) ); 05216 if ( fake_out_lineno ) lineno = true_lineno; 05217 } 05218 } 05219 } 05220 05221 /* ************************************************************************** 05222 * 05223 * Function name: skip_string 05224 * Synopsis: When Ignoring, skip various kinds of strings. Maps 05225 * to string-handlers in handle_internal()... 05226 * 05227 * Associated FORTH words: Double-Quote ( " ) string 05228 * Dot-Quote ( ." ) string 05229 * Ess-Quote ( s" ) string 05230 * Dot-Paren .( string 05231 * ABORT" (even if not enabled) 05232 * { (Local-Values declaration) and -> (Local-Values assignment) 05233 * are also handled if ibm_locals is enabled. 05234 * 05235 * Inputs: 05236 * Parameters: 05237 * pfield Param-field of the entry associated with 05238 * the FWord that is being Ignored. 05239 * Global Variables: 05240 * statbuf The word that was just read. 05241 * pc Input-stream pointer 05242 * lineno Line-number, used for errors and warnings 05243 * ibm_locals TRUE if IBM-style Locals are enabled 05244 * 05245 * Outputs: 05246 * Returned Value: NONE 05247 * 05248 * Error Detection: 05249 * Multi-line warnings, "Unterminated" Errors 05250 * handled by called routines 05251 * 05252 * Extraneous Remarks: 05253 * We would prefer to keep this function private, too, so we 05254 * will declare its prototype here and in the one other 05255 * file where we need it, namely, dictionary.c, rather 05256 * than exporting it widely in a .h file. 05257 * 05258 **************************************************************************** */ 05259 05260 void skip_string( tic_param_t pfield); 05261 void skip_string( tic_param_t pfield) 05262 { 05263 fwtoken tok = pfield.fw_token; 05264 unsigned int sav_lineno = lineno; 05265 bool handy_toggle = TRUE ; /* Various uses... */ 05266 05267 switch (tok) { 05268 case STRING: /* Double-Quote ( " ) string */ 05269 case PSTRING: /* Dot-Quote ( ." ) string */ 05270 case ABORTTXT: /* ABORT", even if not enabled */ 05271 get_string( FALSE); /* Don't truncate; ignoring anyway */ 05272 /* Will handle multi-line warnings, etc. */ 05273 break; 05274 05275 case SSTRING: /* Ess-Quote ( s" ) string */ 05276 handy_toggle = FALSE; 05277 case PBSTRING: /* Dot-Paren .( string */ 05278 if (*pc++=='\n') lineno++; 05279 { 05280 unsigned int strt_lineno = lineno; 05281 get_until( handy_toggle ? ')' : '"' ); 05282 string_err_check( handy_toggle, sav_lineno, strt_lineno ); 05283 } 05284 break; 05285 05286 default: 05287 /* IBM-style Locals, under control of a switch */ 05288 if ( ibm_locals ) 05289 { 05290 bool found_it = TRUE; 05291 switch (tok) { 05292 case CURLY_BRACE: 05293 declare_locals( TRUE); 05294 break; 05295 case DASH_ARROW: 05296 get_word(); 05297 break; 05298 default: 05299 found_it = FALSE; 05300 } 05301 if ( found_it ) break; 05302 } 05303 05304 tokenization_error ( FATAL, "Program Error. " 05305 "Unimplemented skip-string word '%s'\n", strupr(statbuf) ); 05306 } 05307 } 05308 05309 /* ************************************************************************** 05310 * 05311 * Function name: process_remark 05312 * Synopsis: The active function for remarks (backslash-space) 05313 * and comments (enclosed within parens) 05314 * 05315 * Associated FORTH word(s): \ ( 05316 * 05317 * Inputs: 05318 * Parameters: 05319 * TIC entry "parameter field", init'd to delimiter character. 05320 * 05321 * Outputs: 05322 * Returned Value: NONE 05323 * 05324 * Error Detection: 05325 * Warning if end-of-file encountered before delimiter. 05326 * Warning if multi-line parentheses-delimited comment. 05327 * 05328 * Process Explanation: 05329 * Skip until the delimiter. 05330 * If end-of-file was encountered, issue Warning. 05331 * Otherwise, and if delimiter was not new-line, 05332 * check for multi-line with Warning. 05333 * 05334 **************************************************************************** */ 05335 05336 void process_remark( tic_param_t pfield ) 05337 { 05338 char until_char = (char)pfield.deflt_elem ; 05339 unsigned int start_lineno = lineno; 05340 05341 #ifdef DEBUG_SCANNER 05342 05343 get_until(until_char); 05344 printf ("%s:%d: debug: stack diagram: %s)\n", 05345 iname, lineno, statbuf); 05346 #else 05347 05348 if ( skip_until( until_char) ) 05349 { 05350 if ( until_char == '\n' ) 05351 { 05352 /* Don't need any saved line number here ... */ 05353 tokenization_error ( WARNING, 05354 "Unterminated remark.\n"); 05355 }else{ 05356 warn_unterm( WARNING, "comment", start_lineno); 05357 } 05358 }else{ 05359 if ( until_char != '\n' ) 05360 { 05361 pc++; 05362 warn_if_multiline( "comment", start_lineno); 05363 } 05364 } 05365 #endif /* DEBUG_SCANNER */ 05366 } 05367 05368 05369 /* ************************************************************************** 05370 * 05371 * Function name: filter_comments 05372 * Synopsis: Process remarks and comments in special conditions 05373 * 05374 * Inputs: 05375 * Parameters: 05376 * inword Current word just parsed 05377 * 05378 * Outputs: 05379 * Returned Value: TRUE if Current word is a Comment-starter. 05380 * Comment will be processed 05381 * 05382 * Process Explanation: 05383 * We want to be able to recognize any alias the user may have 05384 * defined to a comment-delimiter, in whatever applicable 05385 * vocabulary it might be. 05386 * The active-function of any such alias will, of necessity, be 05387 * the process_remark() routine, defined just above. 05388 * We will search for the TIC-entry of the given word; if we don't 05389 * find it, it's not a comment-delimiter. If we do find it, 05390 * and it is one, we invoke its active-function and return TRUE. 05391 * We also want to permit the "allow-multiline-comments" directive 05392 * to be processed in the context that calls this routine, so 05393 * we will check for that condition, too. 05394 * 05395 **************************************************************************** */ 05396 05397 bool filter_comments( u8 *inword) 05398 { 05399 bool retval = FALSE; 05400 tic_hdr_t *found = lookup_word( inword, NULL, NULL ); 05401 05402 if ( found != NULL ) 05403 { 05404 if ( found->funct == process_remark ) 05405 { 05406 found->funct( found->pfield); 05407 retval = TRUE; 05408 }else{ 05409 /* Permit the "allow-multiline-comments" directive */ 05410 if ( found->funct == handle_internal ) 05411 { 05412 if ( found->pfield.fw_token == ALLOW_MULTI_LINE ) 05413 { 05414 /* Make sure any intended side-effects occur... */ 05415 found->funct( found->pfield); 05416 retval = TRUE; 05417 } 05418 } 05419 } 05420 } 05421 return ( retval ); 05422 } 05423 05424 05425 /* ************************************************************************** 05426 * 05427 * Function name: tokenize_one_word 05428 * Synopsis: Tokenize the currently-obtained word 05429 * along with whatever it consumes. 05430 * 05431 * Inputs: 05432 * Parameters: 05433 * wlen Length of symbol just retrieved from the input stream 05434 * This is not really used here any more; it's 05435 * left over from an earlier implementation. 05436 * Global Variables: 05437 * statbuf The symbol (word) just retrieved from input stream. 05438 * in_tokz_esc TRUE if "Tokenizer-Escape" mode is in effect; a 05439 * different set of vocabularies from "Normal" 05440 * mode will be checked (along with those that 05441 * are common to both modes). 05442 * ibm_locals Controls whether to check for IBM-style Locals; 05443 * set by means of a command-line switch. 05444 * 05445 * Outputs: 05446 * Returned Value: NONE 05447 * Global Variables: 05448 * statbuf May be incremented 05449 * in_tokz_esc May be set if the word just retrieved is 05450 * the tokenizer[ directive. 05451 * tic_found 05452 * 05453 * Error Detection: 05454 * If the word could neither be identified nor processed as a number, 05455 * that is an ERROR; pass it to tokenized_word_error for a 05456 * message. 05457 * 05458 * Process Explanation: 05459 * Look for the word in each of the various lists and vocabularies 05460 * in which it might be found, as appropriate to the current 05461 * state of activity. 05462 * If found, process it accordingly. 05463 * If not found, try to process it as a number. 05464 * If cannot process it as a number, declare an error. 05465 * 05466 * Revision History: 05467 * Updated Tue, 10 Jan 2006 by David L. Paktor 05468 * Convert to tic_hdr_t type vocabularies. 05469 * Updated Mon, 03 Apr 2006 by David L. Paktor 05470 * Replaced bulky "Normal"-vs-"Escape" block with a call 05471 * to lookup_word . Attend to a small but important 05472 * side-effect of the "handle_<vocab>" routines that 05473 * feeds directly into the protection against self- 05474 * -recursion in a user-defined Macro: Set the global 05475 * variable tic_found to the entry, just before we 05476 * execute it, and we're good to go... 05477 * 05478 * Extraneous Remarks: 05479 * We trade off the strict rules of structure for simplicity 05480 * of coding. 05481 * 05482 **************************************************************************** */ 05483 05484 void tokenize_one_word( signed long wlen ) 05485 { 05486 05487 /* The shared lookup routine now handles everything. */ 05488 tic_hdr_t *found = lookup_word( statbuf, NULL, NULL ); 05489 05490 if ( found != NULL ) 05491 { 05492 tic_found = found; 05493 found->funct( found->pfield); 05494 return ; 05495 } 05496 05497 /* It's not a word in any of our current contexts. 05498 * Is it a number? 05499 */ 05500 if ( handle_number() ) 05501 { 05502 return ; 05503 } 05504 05505 /* Could not identify - give a shout. */ 05506 tokenized_word_error( statbuf ); 05507 } 05508 05509 /* ************************************************************************** 05510 * 05511 * Function name: tokenize 05512 * Synopsis: Tokenize the current input stream. 05513 * May be called recursively for macros and such. 05514 * 05515 * Revision History: 05516 * Updated Thu, 24 Mar 2005 by David L. Paktor 05517 * Factor-out comment-filtration; apply to gather_locals 05518 * Factor-out tokenizing a single word (for conditionals) 05519 * Separate actions of "Tokenizer-Escape" mode. 05520 * 05521 **************************************************************************** */ 05522 05523 void tokenize(void) 05524 { 05525 signed long wlen = 0; 05526 05527 while ( wlen >= 0 ) 05528 { 05529 wlen = get_word(); 05530 if ( wlen > 0 ) 05531 { 05532 tokenize_one_word( wlen ); 05533 } 05534 } 05535 } 05536