#include "lex.h" #include "errloc.h" #include #include #include #include #include static void free_tk(struct tk *tk) { free(tk->s); } static int invnum(const struct tk *tk, char *end) { if (errno) errloc(tk, "invalid number: %s (%s)", tk->s, strerror(errno)); else if (*end) errloc(tk, "invalid number: %s", tk->s); return -1; } static int fintok(struct lex *lex) { struct tk *tk = &lex->tk, *tokens; const struct tk empty = {.loc = tk->loc}; size_t ntk = lex->ntok + 1; char *s; if (tk->type == UNDEF) return 0; else if (!(s = realloc(tk->s, lex->len + 1))) { perror("realloc(3)"); return -1; } tk->s = s; tk->s[lex->len] = '\0'; switch (tk->type) { case UNDEF: break; case ANY: errloc(tk, "%s: unreachable", __func__); return -1; case LIT: case ID: break; case NUM: { int neg = *tk->s == '-'; char *end; errno = 0; strtoll(tk->s, &end, 0); if (errno || *end) { if (neg) return invnum(tk, end); errno = 0; strtoull(tk->s, &end, 0); if (errno || *end) return invnum(tk, end); } } } if (!(tokens = realloc(lex->tokens, ntk * sizeof *tokens))) { perror("realloc(3)"); return -1; } tokens[lex->ntok++] = lex->tk; lex->tokens = tokens; lex->tk = empty; lex->len = 0; return 0; } static int printable(char c) { return c >= '!' && c <= '~'; } static void invch(const struct lex *l, char c) { if (printable(c)) errcloc(l, "invalid character: %c", c); else errcloc(l, "invalid character: (%#hhx)", c); } static int ch(char c, struct lex *lex) { struct tk *tk = &lex->tk; char *s; switch (tk->type) { case UNDEF: tk->loc = lex->loc; if (c == '\"') { tk->type = LIT; return 0; } else if (c == '-' || c == '+' || (c >= '0' && c <= '9')) tk->type = NUM; else if (c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) tk->type = ID; else { invch(lex, c); return -1; } break; case LIT: if (c == '\"') return fintok(lex); break; case NUM: if (c == '\"') { invch(lex, c); return -1; } break; case ID: if (c == '\"' || c == '-' || c == '+') { invch(lex, c); return -1; } else if (!(c == '_' || (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))) { invch(lex, c); return -1; } break; case ANY: errcloc(lex, "%s: unreachable", __func__); return -1; } if (!(s = realloc(tk->s, lex->len + 1))) { perror("realloc(3)"); return -1; } s[lex->len++] = c; tk->s = s; return 0; } static int clex(char c, struct lex *lex) { struct tk *tk = &lex->tk; struct loc *loc = &lex->loc; ++loc->col; switch (c) { case '*': lex->comment = 1; return fintok(lex); case '\n': if (tk->type == LIT) { errcloc(lex, "unterminated literal"); return -1; } else if (fintok(lex)) return -1; loc->line++; loc->col = lex->comment = 0; return 0; case ' ': case '\t': if (tk->type != LIT) return lex->comment ? 0 : fintok(lex); default: return lex->comment ? 0 : ch(c, lex); } invch(lex, c); return -1; } void lex_free(struct lex *lex) { for (size_t i = 0; i < lex->ntok; i++) free_tk(&lex->tokens[i]); free(lex->tokens); free_tk(&lex->tk); } int lex_eof(const struct lex *lex, const struct tk *tk) { return tk - lex->tokens >= lex->ntok; } int lex(struct lex *l, FILE *f) { l->loc.line = 1; for (;;) { int c = fgetc(f); if (c == EOF) break; else if (clex(c, l)) return -1; } return 0; }