/* aslex.c */
/*
* Copyright (C) 1989-2010 Alan R. Baldwin
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*
*
* Alan R. Baldwin
* 721 Berkeley St.
* Kent, Ohio 44240
*
*/
/*
* 28-Oct-97 JLH bug in getst(): sign extend on ~(SPACE|ILL)
* causes infinite loop
*/
/*
* Extensions: P. Felber, M. Hope
*/
#include "dbuf_string.h"
#include "asxxxx.h"
/*)Module aslex.c
*
* The module aslex.c includes the general lexical
* analysis routines for the assembler.
*
* aslex.c contains the following functions:
* int comma()
* char endline()
* int get()
* int getdstr()
* int getdlm()
* VOID getid()
* int getmap()
* int getnb()
* int getlnm()
* VOID getst()
* int more()
* int nxtline()
* VOID unget()
*
* aslex.c contains no local/static variables
*/
/*)Function VOID getid(id,c)
*
* char * id a pointer to a string of
* maximum length NCPS-1
* int c mode flag
* >=0 this is first character to
* copy to the string buffer
* <0 skip white space, first
* character must be a LETTER
*
* The function getid() scans the current assembler-source text line
* from the current position copying the next LETTER | DIGIT string
* into the external string buffer id[]. The string ends when a non
* LETTER or DIGIT character is found. The maximum number of characters
* copied is NCPS-1. If the input string is larger than NCPS-1
* characters then the string is truncated. The string is always
* NULL terminated. If the mode argument (c) is >=0 then (c) is
* the first character copied to the string buffer, if (c) is <0
* then intervening white space (SPACES and TABS) are skipped and
* the first character found must be a LETTER else a 'q' error
* terminates the parse of this assembler-source text line.
*
* local variables:
* char * p pointer to external string buffer
* int c current character value
*
* global variables:
* char ctype[] a character array which defines the
* type of character being processed.
* This index is the character
* being processed.
*
* called functions:
* int get() aslex.c
* int getnb() aslex.c
* VOID unget() aslex.c
* VOID qerr() assubr.c
*
* side effects:
* Use of getnb(), get(), and unget() updates the
* global pointer ip, the position in the current
* assembler-source text line.
*/
VOID
getid(char *id, int c)
{
char *p;
if (c < 0) {
c = getnb();
if ((ctype[c] & LETTER) == 0)
qerr();
}
p = id;
do {
if (p < &id[NCPS-1])
*p++ = c;
} while (ctype[c=get()] & (LETTER|DIGIT));
unget(c);
*p++ = 0;
}
/*)Function VOID getst(id,c)
*
* char * id a pointer to a string of
* maximum length NCPS-1
* int c mode flag
* >=0 this is first character to
* copy to the string buffer
* <0 skip white space, first
* character must be a LETTER
*
* The function getst() scans the current assembler-source text line
* from the current position copying the next character string into
* the external string buffer (id). The string ends when a SPACE or
* ILL character is found. The maximum number of characters copied is
* NCPS-1. If the input string is larger than NCPS-1 characters then
* the string is truncated. The string is always NULL terminated.
* If the mode argument (c) is >=0 then (c) is the first character
* copied to the string buffer, if (c) is <0 then intervening white
* space (SPACES and TABS) are skipped and the first character found
* must be a LETTER else a 'q' error terminates the parse of this
* assembler-source text line.
*
* local variables:
* char * p pointer to external string buffer
* int c current character value
*
* global variables:
* char ctype[] a character array which defines the
* type of character being processed.
* This index is the character
* being processed.
*
* called functions:
* int get() aslex.c
* int getnb() aslex.c
* VOID unget() aslex.c
* VOID qerr() assubr.c
*
* side effects:
* use of getnb(), get(), and unget() updates the
* global pointer ip, the position in the current
* assembler-source text line.
*/
VOID
getst(char *id, int c)
{
char *p;
if (c < 0) {
c = getnb();
if ((ctype[c] & LETTER) == 0)
qerr();
}
p = id;
do {
if (p < &id[NCPS-1])
*p++ = c;
} while (ctype[c=get()] & ~(SPACE|ILL) & 0xFF);
unget(c);
*p++ = 0;
}
/*)Function int getdstr(str, slen)
*
* char * str character array to return string in
* int slen charater array length
*
* The function getdstr() returns the character string
* within delimiters. If no delimiting character
* is found a 'q' error is generated.
*
* local variables:
* int c current character from
* assembler-source text line
* int d the delimiting character
*
* global variables:
* none
*
* called functions:
* int get() aslex.c
* int getdlm() aslex.c
* VOID qerr() assubr.c
*
* side effects:
* Returns the character string delimited by the
* character returned from getdlm(). SPACEs and
* TABs before the delimited string are skipped.
* A 'q' error is generated if no delimited string
* is found or the input line terminates unexpectedly.
*/
VOID
getdstr(str, slen)
char * str;
int slen;
{
char *p;
int c, d;
d = getdlm();
p = str;
while ((c = get()) != d) {
if (c == '\0') {
qerr();
}
if (p < &str[slen-1]) {
*p++ = c;
} else {
break;
}
}
*p = 0;
}
/*)Function int getdlm()
*
* The function getdlm() returns the delimiter character
* or if the end of the line is encountered a 'q' error
* is generated.
*
* local variables:
* int c current character from
* assembler-source text line
*
* global variables:
* none
*
* called functions:
* int get() aslex.c
* int getnb() aslex.c
* int more() aslex.c
* VOID qerr() assubr.c
*
* side effects:
* scans ip to the first non 'SPACE' or 'TAB' character
* and returns that character or the first character
* following a ^ character as the delimiting character.
* The end of the text line or the begining of a
* comment returns causes a 'q' error.
*/
int
getdlm()
{
int c;
if (more()) {
if ((c = getnb()) == '^') {
c = get();
}
} else {
c = '\0';
}
if (c == '\0') {
qerr();
}
return (c);
}
/*)Function int getnb()
*
* The function getnb() scans the current assembler-source
* text line returning the first character not a SPACE or TAB.
*
* local variables:
* int c current character from
* assembler-source text line
*
* global variables:
* none
*
* called functions:
* int get() aslex.c
*
* side effects:
* use of get() updates the global pointer ip, the position
* in the current assembler-source text line
*/
int
getnb(void)
{
int c;
while ((c=get()) == ' ' || c == '\t')
;
return (c);
}
/*)Function int get()
*
* The function get() returns the next character in the
* assembler-source text line, at the end of the line a
* NULL character is returned.
*
* local variables:
* int c current character from
* assembler-source text line
*
* global variables:
* char * ip pointer into the current
* assembler-source text line
*
* called functions:
* none
*
* side effects:
* updates ip to the next character position in the
* assembler-source text line. If ip is at the end of the
* line, ip is not updated.
*/
int
get(void)
{
int c;
if ((c = *ip) != 0)
++ip;
return (c & 0x007F);
}
/*)Function VOID unget(c)
*
* int c value of last character read from
* assembler-source text line
*
* If (c) is not a NULL character then the global pointer ip
* is updated to point to the preceeding character in the
* assembler-source text line.
*
* NOTE: This function does not push the character (c)
* back into the assembler-source text line, only
* the pointer ip is changed.
*
* local variables:
* int c last character read from
* assembler-source text line
*
* global variables:
* char * ip position into the current
* assembler-source text line
*
* called functions:
* none
*
* side effects:
* ip decremented by 1 character position
*/
VOID
unget(int c)
{
if (c)
if (ip != ib)
--ip;
}
/*)Function int getmap(d)
*
* int d value to compare with the
* assembler-source text line character
*
* The function getmap() converts the 'C' style characters \b, \f,
* \n, \r, and \t to their equivalent ascii values and also
* converts 'C' style octal constants '\123' to their equivalent
* numeric values. If the first character is equivalent to (d) then
* a (-1) is returned, if the end of the line is detected then
* a 'q' error terminates the parse for this line, or if the first
* character is not a \ then the character value is returned.
*
* local variables:
* int c value of character from the
* assembler-source text line
* int n looping counter
* int v current value of numeric conversion
*
* global variables:
* none
*
* called functions:
* int get() aslex.c
* VOID qerr() assubr.c
*
* side effects:
* use of get() updates the global pointer ip the position
* in the current assembler-source text line
*/
int
getmap(int d)
{
int c, n, v;
if ((c=get()) == '\0')
qerr();
if (c == d)
return (-1);
if (c == '\\') {
c = get();
switch (c) {
case 'b':
c = '\b';
break;
case 'f':
c = '\f';
break;
case 'n':
c = '\n';
break;
case 'r':
c = '\r';
break;
case 't':
c = '\t';
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
n = 0;
v = 0;
while (++n<=3 && c>='0' && c<='7') {
v = (v<<3) + c - '0';
c = get();
}
unget(c);
c = v;
break;
default:
unget(c);
c = '\\';
break;
}
}
return (c);
}
/*)Function int comma(flag)
*
* int flag when flag is non zero a 'q' error is
* generated if a COMMA is not found.
*
* The function comma() skips SPACEs and TABs and returns
* a '1' if the next character is a COMMA else a '0' is
* returned. If a COMMA is not found and flag is non zero
* then a 'q' error is reported.
*
* local variables:
* int c last character read from
* assembler-source text line
*
* global variables:
* none
*
* called functions:
* int getnb() aslex.c
* VOID qerr() assubr.c
* VOID unget() aslex.c
*
* side effects:
* assembler-source text line pointer updated
*/
int
comma(int flag)
{
int c;
if ((c = getnb()) != ',') {
if (flag) {
qerr();
} else {
unget(c);
}
return(0);
}
return(1);
}
/*)Function int nxtline()
*
* The function nxtline() reads a line of assembler-source text
* from an assembly source text file, include file, or macro.
* Lines of text are processed from assembler-source files until
* all files have been read. If an include file is opened then
* lines of text are read from the include file (or nested
* include file) until the end of the include file is found.
* The input text line is transferred into the global string
* ib[] and converted to a NULL terminated string. The string
* is then copied into the global string ic[] which is used
* for internal processing by the assembler. The function
* nxtline() returns a (1) after succesfully reading
* a line, or a (0) if all files have been read.
*
* local variables:
* int len string length
* struct asmf *asmt temporary pointer to the processing structure
*
* global variables:
* char afn[] afile() constructed filespec
* int afp afile constructed path length
* asmf * asmc pointer to current assembler file structure
* asmf * asmi pointer to a queued include file structure
* asmf * asmq pointer to a queued macro structure
* char * ib string buffer containing
* assembler-source text line for processing
* char * ic string buffer containing
* assembler-source text line for listing
* int asmline source file line number
* int incfil current include file count
* int incline include file line number
* int lnlist LIST-NLIST state
* int mcrline macro line number
* int srcline current source line number
* int uflag -u, disable .list/.nlist processing
*
* called functions:
* int dbuf_init()
* int dbuf_set_length()
* int dbuf_getline()
* const char * dbuf_c_str()
* int dbuf_append_str()
* int fclose() c-library
* char * fgetm() asmcro.c
* char * strcpy() c_library
*
* side effects:
* include file will be closed at detection of end of file.
* the next sequential source file may be selected.
* The current file specification afn[] and the path
* length afp may be changed.
* The respective line counter will be updated.
*
* --------------------------------------------------------------
*
* How the assembler sequences the command line assembler
* source files, include files, and macros is shown in a
* simplified manner in the following.
*
* main[asmain] sequences the command line files by creating
* a linked list of asmf structures, one for each file.
*
* asmf structures:
* ------------- ------------- -------------
* | File 1 | | File 2 | | File N |
* ------ | ------| | ------| | ------|
* | asmp | -->| | next | --> | | next | --> ... --> | | NULL |
* ------ ------------- ------------- -------------
*
* At the beginning of each assembler pass set asmc = asmp
* and process the files in sequence.
*
* If the source file invokes the .include directive to process a
* file then a new asmf structure is prepended to the asmc structure
* currently being processed. At the end of the include file the
* processing resumes at the point the asmc structure was interrupted.
* This is shown in the following:
*
* -------------
* | Incl File 1 |
* | ------|
* | | next |
* -------------
* |
* asmf structures: |
* V
* ------------- ------------- -------------
* | File 1 | | File 2 | | File N |
* ------ | ------| | ------| | ------|
* | asmp | -->| | next | --> | | next | --> ... --> | | NULL |
* ------ ------------- ------------- -------------
*
* At the .include point link the asmi structure to asmc
* and then set asmc = asmi (the include file asmf structure).
*
* If a source file invokes a macro then a new asmf structure is
* prepended to the asmc structure currently being processed. At the
* end of the macro the processing resumes at the point the asmc
* structure was interrupted.
* This is shown in the following:
*
* ------------- -------------
* | Incl File 1 | | Macro |
* | ------| | ------|
* | | next | | | next |
* ------------- -------------
* | |
* asmf structures: | |
* V V
* ------------- ------------- -------------
* | File 1 | | File 2 | | File N |
* ------ | ------| | ------| | ------|
* | asmp | -->| | next | --> | | next | --> ... --> | | NULL |
* ------ ------------- ------------- -------------
*
* At the macro point link the asmq structure to asmc
* and then set asmc = asmq (the macro asmf structure).
*
* Note that both include files and macros can be nested.
* Macros may be invoked within include files and include
* files can be invoked within macros.
*
* Include files are opened, read, and closed on each pass
* of the assembler.
*
* Macros are recreated during each pass of the assembler.
*/
int
nxtline(void)
{
static struct dbuf_s dbuf_ib;
static struct dbuf_s dbuf_ic;
size_t len = 0;
struct asmf *asmt;
if (!dbuf_is_initialized (&dbuf_ib))
dbuf_init (&dbuf_ib, 1024);
if (!dbuf_is_initialized (&dbuf_ic))
dbuf_init (&dbuf_ic, 1024);
dbuf_set_length (&dbuf_ib, 0);
dbuf_set_length (&dbuf_ic, 0);
loop: if (asmc == NULL) return(0);
/*
* Insert Include File
*/
if (asmi != NULL) {
asmc = asmi;
asmi = NULL;
incline = 0;
}
/*
* Insert Queued Macro
*/
if (asmq != NULL) {
asmc = asmq;
asmq = NULL;
mcrline = 0;
}
switch(asmc->objtyp) {
case T_ASM:
if ((len = dbuf_getline (&dbuf_ib, asmc->fp)) == 0) {
if ((asmc->flevel != flevel) || (asmc->tlevel != tlevel)) {
err('i');
fprintf(stderr, "?ASxxxx-Error- at end of assembler file\n");
fprintf(stderr, " %s\n", geterr('i'));
}
flevel = asmc->flevel;
tlevel = asmc->tlevel;
lnlist = asmc->lnlist;
asmc = asmc->next;
if (asmc != NULL) {
asmline = 0;
}
if ((lnlist & LIST_PAG) || (uflag == 1)) {
lop = NLPP;
}
goto loop;
} else {
if (asmline++ == 0) {
strcpy(afn, asmc->afn);
afp = asmc->afp;
}
srcline = asmline;
}
break;
case T_INCL:
if ((len = dbuf_getline (&dbuf_ib, asmc->fp)) == 0) {
fclose(asmc->fp);
incfil -= 1;
if ((asmc->flevel != flevel) || (asmc->tlevel != tlevel)) {
err('i');
fprintf(stderr, "?ASxxxx-Error- at end of include file\n");
fprintf(stderr, " %s\n", geterr('i'));
}
srcline = asmc->line;
flevel = asmc->flevel;
tlevel = asmc->tlevel;
lnlist = asmc->lnlist;
asmc = asmc->next;
switch (asmc->objtyp) {
default:
case T_ASM: asmline = srcline; break;
case T_INCL: incline = srcline; break;
case T_MACRO: mcrline = srcline; break;
}
/*
* Scan for parent file
*/
asmt = asmc;
while (asmt != NULL) {
if (asmt->objtyp != T_MACRO) {
strcpy(afn, asmt->afn);
afp = asmt->afp;
break;
}
asmt = asmt->next;
}
if ((lnlist & LIST_PAG) || (uflag == 1)) {
lop = NLPP;
}
goto loop;
} else {
if (incline++ == 0) {
strcpy(afn, asmc->afn);
afp = asmc->afp;
}
srcline = incline;
}
break;
case T_MACRO:
dbuf_append(&dbuf_ib, "\0", dbuf_ib.alloc - 1);
ib = (char *)dbuf_c_str (&dbuf_ib);
ib = fgetm(ib, dbuf_ib.alloc - 1, asmc->fp);
if (ib == NULL) {
dbuf_set_length(&dbuf_ib, 0);
mcrfil -= 1;
srcline = asmc->line;
flevel = asmc->flevel;
tlevel = asmc->tlevel;
lnlist = asmc->lnlist;
asmc = asmc->next;
switch (asmc->objtyp) {
default:
case T_ASM: asmline = srcline; break;
case T_INCL: incline = srcline; break;
case T_MACRO: mcrline = srcline; break;
}
goto loop;
} else {
len = strlen(ib);
dbuf_set_length(&dbuf_ib, len);
if (mcrline++ == 0) {
;
}
srcline = mcrline;
}
break;
default:
fprintf(stderr, "?ASxxxx-Internal-nxtline(objtyp)-Error.\n\n");
asexit(ER_FATAL);
break;
}
ib = (char *)dbuf_c_str (&dbuf_ib);
/* remove the trailing NL */
if (len > 0 && '\n' == ib[len - 1])
{
--len;
if (len > 0 && '\r' == ib[len - 1])
--len;
dbuf_set_length (&dbuf_ib, len);
ib = (char *)dbuf_c_str (&dbuf_ib);
}
dbuf_append_str (&dbuf_ic, ib);
ic = (char *)dbuf_c_str (&dbuf_ic);
return(1);
}
/*)Function: int getlnm()
*
* The function getlnm() returns the line number of the
* originating assembler or include file.
*
* local variables:
* struct asmf *asmt temporary pointer to the processing structure
*
* global variables:
* struct asmf *asmc pointer to the current input processing structure
* int asmline line number in current assembler file
* int line line number
*
* functions called:
* none
*
* side effects:
* Sets line to the source file line number.
*/
int
getlnm()
{
struct asmf *asmt;
line = srcline;
if (asmc->objtyp == T_MACRO) {
asmt = asmc->next;
while (asmt != NULL) {
switch (asmt->objtyp) {
case T_ASM: return(line = asmline);
case T_INCL: return(line = asmt->line);
default: asmt = asmt->next; break;
}
}
}
return(line);
}
/*)Function int more()
*
* The function more() scans the assembler-source text line
* skipping white space (SPACES and TABS) and returns a (0)
* if the end of the line or a comment delimeter (;) is found,
* or a (1) if their are additional characters in the line.
*
* local variables:
* int c next character from the
* assembler-source text line
*
* global variables:
* none
*
* called functions:
* int getnb() aslex.c
* VOID unget() aslex.c
*
* side effects:
* use of getnb() and unget() updates the global pointer ip
* the position in the current assembler-source text line
*/
int
more(void)
{
int c;
c = getnb();
unget(c);
return( (c == '\0' || c == ';') ? 0 : 1 );
}
/*)Function char endline()
*
* The function endline() scans the assembler-source text line
* skipping white space (SPACES and TABS) and returns the next
* character or a (0) if the end of the line is found or a
* comment delimiter (;) is found.
*
* local variables:
* int c next character from the
* assembler-source text line
*
* global variables:
* none
*
* called functions:
* int getnb() aslex.c
*
* side effects:
* use of getnb() updates the global pointer ip the
* position in the current assembler-source text line
*/
char
endline(void)
{
int c;
c = getnb();
return( (c == '\0' || c == ';') ? 0 : c );
}