diff options
Diffstat (limited to 'lex.c')
| -rw-r--r-- | lex.c | 247 |
1 files changed, 247 insertions, 0 deletions
@@ -0,0 +1,247 @@ +#include "lex.h" +#include "errloc.h" +#include <errno.h> +#include <stdarg.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +static void free_tk(struct tk *tk) +{ + free(tk->s); +} + +static int invnum(const struct tk *tk, char *end) +{ + if (errno) + errloc(tk, "invalid number: %s (%s)", tk->s, strerror(errno)); + else if (*end) + errloc(tk, "invalid number: %s", tk->s); + + return -1; +} + +static int fintok(struct lex *lex) +{ + struct tk *tk = &lex->tk, *tokens; + const struct tk empty = {.loc = tk->loc}; + size_t ntk = lex->ntok + 1; + char *s; + + if (tk->type == UNDEF) + return 0; + else if (!(s = realloc(tk->s, lex->len + 1))) + { + perror("realloc(3)"); + return -1; + } + + tk->s = s; + tk->s[lex->len] = '\0'; + + switch (tk->type) + { + case UNDEF: + break; + + case ANY: + errloc(tk, "%s: unreachable", __func__); + return -1; + + case LIT: + case ID: + break; + + case NUM: + { + int neg = *tk->s == '-'; + char *end; + + errno = 0; + strtoll(tk->s, &end, 0); + + if (errno || *end) + { + if (neg) + return invnum(tk, end); + + errno = 0; + strtoull(tk->s, &end, 0); + + if (errno || *end) + return invnum(tk, end); + } + } + } + + if (!(tokens = realloc(lex->tokens, ntk * sizeof *tokens))) + { + perror("realloc(3)"); + return -1; + } + + tokens[lex->ntok++] = lex->tk; + lex->tokens = tokens; + lex->tk = empty; + lex->len = 0; + return 0; +} + +static int printable(char c) +{ + return c >= '!' && c <= '~'; +} + +static void invch(const struct lex *l, char c) +{ + if (printable(c)) + errcloc(l, "invalid character: %c", c); + else + errcloc(l, "invalid character: (%#hhx)", c); +} + +static int ch(char c, struct lex *lex) +{ + struct tk *tk = &lex->tk; + char *s; + + switch (tk->type) + { + case UNDEF: + tk->loc = lex->loc; + + if (c == '\"') + { + tk->type = LIT; + return 0; + } + else if (c == '-' || c == '+' + || (c >= '0' && c <= '9')) + tk->type = NUM; + else if (c == '_' + || (c >= 'a' && c <= 'z') + || (c >= 'A' && c <= 'Z')) + tk->type = ID; + else + { + invch(lex, c); + return -1; + } + + break; + + case LIT: + if (c == '\"') + return fintok(lex); + + break; + + case NUM: + if (c == '\"') + { + invch(lex, c); + return -1; + } + + break; + + case ID: + if (c == '\"' || c == '-' || c == '+') + { + invch(lex, c); + return -1; + } + else if (!(c == '_' + || (c >= '0' && c <= '9') + || (c >= 'a' && c <= 'z') + || (c >= 'A' && c <= 'Z'))) + { + invch(lex, c); + return -1; + } + + break; + case ANY: + errcloc(lex, "%s: unreachable", __func__); + return -1; + } + + if (!(s = realloc(tk->s, lex->len + 1))) + { + perror("realloc(3)"); + return -1; + } + + s[lex->len++] = c; + tk->s = s; + return 0; +} + +static int clex(char c, struct lex *lex) +{ + struct tk *tk = &lex->tk; + struct loc *loc = &lex->loc; + + ++loc->col; + + switch (c) + { + case '*': + lex->comment = 1; + return fintok(lex); + + case '\n': + if (tk->type == LIT) + { + errcloc(lex, "unterminated literal"); + return -1; + } + else if (fintok(lex)) + return -1; + + loc->line++; + loc->col = lex->comment = 0; + return 0; + + case ' ': + case '\t': + if (tk->type != LIT) + return lex->comment ? 0 : fintok(lex); + default: + return lex->comment ? 0 : ch(c, lex); + } + + invch(lex, c); + return -1; +} + +void lex_free(struct lex *lex) +{ + for (size_t i = 0; i < lex->ntok; i++) + free_tk(&lex->tokens[i]); + + free(lex->tokens); + free_tk(&lex->tk); +} + +int lex_eof(const struct lex *lex, const struct tk *tk) +{ + return tk - lex->tokens >= lex->ntok; +} + +int lex(struct lex *l, FILE *f) +{ + l->loc.line = 1; + + for (;;) + { + int c = fgetc(f); + + if (c == EOF) + break; + else if (clex(c, l)) + return -1; + } + + return 0; +} |
