484 lines
11 KiB
C
484 lines
11 KiB
C
#include "token.h"
|
|
#include "parser.h"
|
|
#include <ctype.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <stddef.h>
|
|
#include <stdarg.h>
|
|
|
|
static int verbose, extra_verbose;
|
|
static unsigned int line, start_column, column, pos, start_pos;
|
|
static const char *path;
|
|
static struct token_list tokens;
|
|
|
|
static void logv(const char *const format, ...)
|
|
{
|
|
if (verbose || extra_verbose)
|
|
{
|
|
va_list ap;
|
|
printf("[v] ");
|
|
va_start(ap, format);
|
|
vprintf(format, ap);
|
|
printf("\n");
|
|
va_end(ap);
|
|
}
|
|
}
|
|
|
|
static void logvv(const char *const format, ...)
|
|
{
|
|
if (extra_verbose && format)
|
|
{
|
|
va_list ap;
|
|
printf("[vv] ");
|
|
va_start(ap, format);
|
|
vprintf(format, ap);
|
|
printf("\n");
|
|
va_end(ap);
|
|
}
|
|
}
|
|
|
|
static void cleanup(void)
|
|
{
|
|
if (tokens.list)
|
|
free(tokens.list);
|
|
}
|
|
|
|
static void fatal_error(const char *const format, ...)
|
|
{
|
|
if (format)
|
|
{
|
|
va_list ap;
|
|
fprintf(stderr, "[error]: ");
|
|
|
|
if (path)
|
|
fprintf(stderr, "%s: ", path);
|
|
|
|
va_start(ap, format);
|
|
vfprintf(stderr, format, ap);
|
|
fprintf(stderr, "\n");
|
|
va_end(ap);
|
|
cleanup();
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
}
|
|
|
|
static char *read_file(const char *const path)
|
|
{
|
|
FILE *const f = fopen(path, "rb");
|
|
|
|
if (f)
|
|
{
|
|
fseek(f, 0, SEEK_END);
|
|
|
|
{
|
|
const size_t sz = ftell(f);
|
|
char *const buf = malloc((sz + 1) * sizeof *buf);
|
|
|
|
rewind(f);
|
|
|
|
if (buf)
|
|
{
|
|
const size_t read_bytes = fread(buf,
|
|
sizeof *buf, sz, f);
|
|
|
|
fclose(f);
|
|
|
|
if (read_bytes == sz)
|
|
{
|
|
/* File contents were read succesfully. */
|
|
logv("File %s was opened successfully", path);
|
|
|
|
buf[sz] = '\0';
|
|
line = 1;
|
|
|
|
return buf;
|
|
}
|
|
else
|
|
fatal_error("Could not read %s succesfully. "
|
|
"Only %d/%d bytes could be read",
|
|
path, read_bytes, sz);
|
|
}
|
|
else
|
|
fatal_error("Cannot allocate buffer for file data");
|
|
|
|
fclose(f);
|
|
}
|
|
}
|
|
else
|
|
fatal_error("Input file %s could not be opened", path);
|
|
|
|
return NULL;
|
|
}
|
|
|
|
static int alloc_token(const enum token_id id,
|
|
const char *const value)
|
|
{
|
|
if (!tokens.list)
|
|
tokens.list = malloc(sizeof *tokens.list);
|
|
else
|
|
tokens.list = realloc(tokens.list, (tokens.n + 1)
|
|
* sizeof *tokens.list);
|
|
|
|
if (tokens.list)
|
|
{
|
|
struct token *const new_t = &tokens.list[tokens.n++];
|
|
|
|
switch (id)
|
|
{
|
|
case TOKEN_HEX:
|
|
/* Fall through. */
|
|
case TOKEN_FLOAT:
|
|
/* Fall through. */
|
|
case TOKEN_SYMBOL:
|
|
/* Fall through. */
|
|
case TOKEN_INT:
|
|
{
|
|
char *const buf = malloc((strlen(value) + 1) * sizeof *buf);
|
|
|
|
if (buf)
|
|
{
|
|
strcpy(buf, value);
|
|
new_t->value = buf;
|
|
break;
|
|
}
|
|
else
|
|
{
|
|
fatal_error("could not allocate space for token value");
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
default:
|
|
new_t->value = "";
|
|
break;
|
|
}
|
|
|
|
new_t->id = id;
|
|
new_t->line = line;
|
|
new_t->column = start_column;
|
|
new_t->pos = start_pos;
|
|
new_t->file = path;
|
|
start_column = 0; start_pos = 0;
|
|
logvv("id=%s, value=\"%s\", line=%u, col=%u, pos=%u",
|
|
token_name(new_t->id), new_t->value, new_t->line,
|
|
new_t->column, new_t->pos);
|
|
return 0;
|
|
|
|
}
|
|
else
|
|
fatal_error("could not allocate space for new token");
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int is_integer(const char *const symbol)
|
|
{
|
|
const char *s;
|
|
|
|
for (s = symbol; *s; s++)
|
|
{
|
|
if (*s < '0' || *s > '9')
|
|
return 0;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int is_hex(const char *const symbol)
|
|
{
|
|
const size_t len = strlen(symbol);
|
|
const size_t minlen = strlen("0x");
|
|
|
|
if (len >= minlen && symbol[0] == '0' &&
|
|
(symbol[1] == 'x' || symbol[1] == 'X'))
|
|
{
|
|
if (len > minlen)
|
|
{
|
|
const char *s;
|
|
|
|
for (s = &symbol[2]; *s; s++)
|
|
{
|
|
if ((*s >= 'a' && *s <= 'f')
|
|
|| (*s >= 'A' && *s <= 'F')
|
|
|| (*s >= '0' && *s <= '9'))
|
|
continue;
|
|
else
|
|
return 0;
|
|
}
|
|
}
|
|
else
|
|
fatal_error("invalid hex number at line %u, column %u",
|
|
line, start_column);
|
|
}
|
|
else
|
|
return 0;
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int is_float(const char *const symbol)
|
|
{
|
|
int count = 0;
|
|
const char *s;
|
|
|
|
for (s = symbol; *s; s++)
|
|
{
|
|
if (count > 1)
|
|
return 0;
|
|
else if (*s < '0' || *s > '9')
|
|
{
|
|
if (*s == '.')
|
|
count++;
|
|
else
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int process_symbol(const char *const symbol)
|
|
{
|
|
if (strlen(symbol))
|
|
{
|
|
static const struct
|
|
{
|
|
enum token_id id;
|
|
const char *value;
|
|
}res[] =
|
|
{
|
|
{TOKEN_LET, "let"}, {TOKEN_IF, "if"}, {TOKEN_WHILE, "while"},
|
|
{TOKEN_FOR, "for"}, {TOKEN_ELSE, "else"}
|
|
};
|
|
|
|
size_t i;
|
|
|
|
for (i = 0; i < sizeof res / sizeof *res; i++)
|
|
if (!strcmp(symbol, res[i].value))
|
|
{
|
|
if (alloc_token(res[i].id, symbol))
|
|
return 1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
if ((*symbol >= 'a' && *symbol <= 'z')
|
|
|| (*symbol >= 'A' && *symbol <= 'Z'))
|
|
if (alloc_token(TOKEN_SYMBOL, symbol)) return 1;
|
|
|
|
if (is_integer(symbol))
|
|
{
|
|
if (alloc_token(TOKEN_INT, symbol)) return 1;
|
|
}
|
|
else if (is_hex(symbol))
|
|
{
|
|
if (alloc_token(TOKEN_HEX, symbol)) return 1;
|
|
}
|
|
else if (is_float(symbol))
|
|
if (alloc_token(TOKEN_FLOAT, symbol)) return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int tokenize(const char *const buf)
|
|
{
|
|
static const struct
|
|
{
|
|
enum token_id id;
|
|
const char *str;
|
|
} ops[] =
|
|
{
|
|
{TOKEN_GE, ">="},
|
|
{TOKEN_LE, "<="},
|
|
{TOKEN_EQ, "=="},
|
|
{TOKEN_LT, "<"},
|
|
{TOKEN_GT, ">"},
|
|
{TOKEN_PLUS, "+"},
|
|
{TOKEN_MINUS, "-"},
|
|
{TOKEN_SLASH, "/"},
|
|
{TOKEN_STAR, "*"},
|
|
{TOKEN_DOT, "."},
|
|
{TOKEN_MOV, "="},
|
|
{TOKEN_LP, "("},
|
|
{TOKEN_RP, ")"},
|
|
{TOKEN_LC, "{"},
|
|
{TOKEN_RC, "}"},
|
|
{TOKEN_COMMA, ","}
|
|
};
|
|
|
|
const char *c = buf;
|
|
char symbol[128];
|
|
char *s = symbol;
|
|
int comment = 0;
|
|
column = 1;
|
|
#define REWIND do {s = symbol;} while (0)
|
|
|
|
while (*c)
|
|
{
|
|
start:
|
|
pos++;
|
|
|
|
switch (*c)
|
|
{
|
|
case '#':
|
|
comment = 1;
|
|
goto next;
|
|
|
|
case '\n':
|
|
line++;
|
|
/* Fall through. */
|
|
case '\r':
|
|
/* Support for CR-only. */
|
|
if (*c == '\r' && (*(c + 1) != '\n'))
|
|
line++;
|
|
|
|
comment = 0;
|
|
column = 1;
|
|
/* Fall through. */
|
|
case '\t':
|
|
/* Fall through. */
|
|
case ' ':
|
|
*s = '\0';
|
|
|
|
if (process_symbol(symbol))
|
|
return 1;
|
|
|
|
start_column = 0;
|
|
REWIND;
|
|
goto next;
|
|
|
|
default:
|
|
if (comment)
|
|
goto next;
|
|
|
|
break;
|
|
}
|
|
|
|
{
|
|
size_t i;
|
|
for (i = 0; i < sizeof ops / sizeof *ops; i++)
|
|
{
|
|
const size_t len = strlen(ops[i].str);
|
|
|
|
if (!strncmp(c, ops[i].str, len))
|
|
{
|
|
if (s != symbol)
|
|
{
|
|
*s = '\0';
|
|
|
|
if (ops[i].id == TOKEN_DOT && is_integer(symbol))
|
|
{
|
|
/* Exception: dot is used both as operator and decimal
|
|
* separator according to the context. */
|
|
goto store;
|
|
}
|
|
else if (process_symbol(symbol))
|
|
return 1;
|
|
|
|
REWIND;
|
|
}
|
|
|
|
strcpy(symbol, ops[i].str);
|
|
start_column = column;
|
|
start_pos = pos;
|
|
if (alloc_token(ops[i].id, symbol))
|
|
return 1;
|
|
|
|
REWIND;
|
|
c += len;
|
|
column += len;
|
|
goto start;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!isalnum(*c))
|
|
fatal_error("invalid character '%c' (0x%hhu), line %u, column %u,"
|
|
" pos %u", *c, *c, line, column, pos);
|
|
store:
|
|
if ((size_t)(s - symbol) < sizeof symbol)
|
|
*s++ = *c;
|
|
else
|
|
fatal_error("maximum symbol length (%u bytes) exceeded"
|
|
" on symbol \"%s\", line %u, column %u",
|
|
sizeof symbol, symbol, line, column);
|
|
|
|
if (!start_column)
|
|
start_column = column;
|
|
|
|
if (!start_pos)
|
|
start_pos = pos;
|
|
next:
|
|
if (*c != '\n' && *c != '\r')
|
|
column++;
|
|
|
|
c++;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int exec(const char *const path)
|
|
{
|
|
int ret = 1;
|
|
char *const buf = read_file(path);
|
|
|
|
if (buf && tokenize(buf))
|
|
{
|
|
if (parse(&tokens))
|
|
goto end;
|
|
|
|
ret = 0;
|
|
}
|
|
else
|
|
goto end;
|
|
|
|
end:
|
|
if (buf)
|
|
free(buf);
|
|
|
|
return ret;
|
|
}
|
|
|
|
int main(const int argc, const char *argv[])
|
|
{
|
|
int i;
|
|
|
|
for (i = 1; i < argc; i++)
|
|
{
|
|
const char *const arg = argv[i];
|
|
|
|
if (*arg != '-' && !path)
|
|
path = arg;
|
|
else if (*arg == '-')
|
|
{
|
|
if (!strcmp(arg, "-vv"))
|
|
extra_verbose = 1;
|
|
else if (!strcmp(arg, "-v"))
|
|
verbose = 1;
|
|
else
|
|
{
|
|
fatal_error("unrecognized option %s", arg);
|
|
return EXIT_FAILURE;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
fatal_error("input path already specified (%s)", path);
|
|
return EXIT_FAILURE;
|
|
}
|
|
}
|
|
|
|
if (path)
|
|
{
|
|
if (exec(path)) return EXIT_FAILURE;
|
|
}
|
|
else
|
|
fatal_error("no input file specified");
|
|
|
|
return EXIT_SUCCESS;
|
|
}
|
|
|