diff options
| author | Carson Fleming <[email protected]> | 2026-03-26 16:21:29 -0400 |
|---|---|---|
| committer | Carson Fleming <[email protected]> | 2026-03-26 16:22:00 -0400 |
| commit | 7d9fb2c733c8c64f6f74eefa0eea35b36be102cd (patch) | |
| tree | 16b6cded5f9611e0ff1948395578845c9688b926 /lexer.c | |
| parent | 68db110d34611fc8bb79035d3a11bba07dea43f3 (diff) | |
| download | ccc-7d9fb2c733c8c64f6f74eefa0eea35b36be102cd.tar.gz | |
let's go we can parse return zero most useful program ever
Diffstat (limited to 'lexer.c')
| -rw-r--r-- | lexer.c | 150 |
1 files changed, 93 insertions, 57 deletions
@@ -7,12 +7,14 @@ static FILE* file = NULL; static int lookahead; +static const char* PATH; static unsigned long LINE, COL; #define LEXER_PANIC(format, ...) {\ fprintf(\ stderr,\ - "ccc: lexer error: line %lu, column %lu: " format "\n",\ + "ccc: lexer error: %s: line %lu, column %lu: " format "\n",\ + PATH,\ LINE,\ COL __VA_OPT__(,)\ __VA_ARGS__);\ @@ -27,6 +29,7 @@ void lexer_load(const char* path) { if (file == NULL) CCC_PANIC; lookahead = fgetc(file); + PATH = path; LINE = 1; COL = 1; } @@ -42,9 +45,15 @@ bool lexer_peek(struct token* p_token) { long orig_offset = ftell(file); int orig_lookahead = lookahead; + unsigned long orig_line = LINE, orig_col = COL; + bool rv = lexer_pop(p_token); + + LINE = orig_line; + COL = orig_col; lookahead = orig_lookahead; fseek(file, orig_offset, SEEK_SET); + return rv; } @@ -79,8 +88,11 @@ static void lex_ident(struct token* p_token, char ic) { buf[len] = 0; *p_token = (struct token) { - .type = IDENTIFIER, - .data.identifier = strndup(buf, sizeof(buf) - 1), + .type = TK_IDENT, + .data.ident = strndup(buf, sizeof(buf) - 1), + .PATH = PATH, + .LINE = LINE, + .COL = COL, }; } @@ -110,8 +122,11 @@ static void lex_float_lit( } *p_token = (struct token) { - .type = FLOAT_LIT, + .type = TK_FLOAT_LIT, .data.float_lit = iv, + .PATH = PATH, + .LINE = LINE, + .COL = COL, }; } @@ -145,8 +160,11 @@ static void lex_int_lit(struct token* p_token, int_lit_t iv) { } *p_token = (struct token) { - .type = INT_LIT, + .type = TK_INT_LIT, .data.int_lit = iv, + .PATH = PATH, + .LINE = LINE, + .COL = COL, }; } @@ -185,8 +203,11 @@ static void lex_char_lit(struct token* p_token) { "expected end of char literal, not \"%c\"", close_quote); *p_token = (struct token) { - .type = CHAR_LIT, + .type = TK_CHAR_LIT, .data.char_lit = c, + .PATH = PATH, + .LINE = LINE, + .COL = COL, }; } @@ -194,8 +215,11 @@ static void lex_str_lit(struct token* p_token) { if (lookahead == '"') { consume_char(); *p_token = (struct token) { - .type = STR_LIT, + .type = TK_STR_LIT, .data.str_lit = strdup(""), + .PATH = PATH, + .LINE = LINE, + .COL = COL, }; return; } @@ -223,75 +247,83 @@ static void lex_str_lit(struct token* p_token) { buf[len] = 0; *p_token = (struct token) { - .type = STR_LIT, + .type = TK_STR_LIT, .data.str_lit = strndup(buf, sizeof(buf) - 1), + .PATH = PATH, + .LINE = LINE, + .COL = COL, }; } static enum token_type two_char_operator_type(char c) { - if (c == '!' && lookahead == '=') return NEQ; - if (c == '^' && lookahead == '=') return XEQ; - if (c == '&' && lookahead == '=') return AND_EQ; - if (c == '&' && lookahead == '&') return LOG_AND; - if (c == '*' && lookahead == '=') return MUL_EQ; - if (c == '-' && lookahead == '=') return NEG_EQ; - if (c == '-' && lookahead == '>') return ARROW; - if (c == '=' && lookahead == '=') return TEST_EQ; - if (c == '+' && lookahead == '=') return PLUS_EQ; - if (c == '|' && lookahead == '|') return LOG_PIPE; - if (c == '|' && lookahead == '=') return PIPE_EQ; - if (c == '/' && lookahead == '=') return DIV_EQ; - if (c == '%' && lookahead == '=') return MOD_EQ; - if (c == '<' && lookahead == '=') return LEQ; - if (c == '>' && lookahead == '=') return GEQ; - if (c == '<' && lookahead == '<') return SHL; - if (c == '>' && lookahead == '>') return SHR; - return NOT_FOUND; + if (c == '!' && lookahead == '=') return TK_NEQ; + if (c == '^' && lookahead == '=') return TK_XEQ; + if (c == '&' && lookahead == '=') return TK_AND_EQ; + if (c == '&' && lookahead == '&') return TK_LOG_AND; + if (c == '*' && lookahead == '=') return TK_MUL_EQ; + if (c == '-' && lookahead == '=') return TK_NEG_EQ; + if (c == '-' && lookahead == '>') return TK_ARROW; + if (c == '=' && lookahead == '=') return TK_TEST_EQ; + if (c == '+' && lookahead == '=') return TK_PLUS_EQ; + if (c == '|' && lookahead == '|') return TK_LOG_PIPE; + if (c == '|' && lookahead == '=') return TK_PIPE_EQ; + if (c == '/' && lookahead == '=') return TK_DIV_EQ; + if (c == '%' && lookahead == '=') return TK_MOD_EQ; + if (c == '<' && lookahead == '=') return TK_LEQ; + if (c == '>' && lookahead == '=') return TK_GEQ; + if (c == '<' && lookahead == '<') return TK_SHL; + if (c == '>' && lookahead == '>') return TK_SHR; + return TK_NOT_FOUND; } static bool lex_complex_operator(struct token* p_token, char c) { enum token_type type = two_char_operator_type(c); - if (type == NOT_FOUND) return false; + if (type == TK_NOT_FOUND) return false; consume_char(); - if (type == SHL && lookahead == '=') { + if (type == TK_SHL && lookahead == '=') { consume_char(); - type = SHL_EQ; + type = TK_SHL_EQ; } - if (type == SHR && lookahead == '=') { + if (type == TK_SHR && lookahead == '=') { consume_char(); - type = SHR_EQ; + type = TK_SHR_EQ; } - *p_token = (struct token) {.type = type}; + *p_token = (struct token) { + .type = type, + .PATH = PATH, + .LINE = LINE, + .COL = COL, + }; return type; } static enum token_type lex_simple_operator(char c) { switch (c) { - case '#': return HASHTAG; - case '(': return LPAREN; - case ')': return RPAREN; - case '{': return LCURLY; - case '}': return RCURLY; - case '[': return LSQUARE; - case ']': return RSQUARE; - case ':': return COLON; - case ';': return SEMI; - case ',': return COMMA; - case '.': return DOT; - case '?': return QMARK; - case '!': return NOT; - case '^': return XOR; - case '&': return AMP; - case '*': return STAR; - case '-': return NEG; - case '=': return ASSIGN; - case '+': return PLUS; - case '\\': return BSLASH; - case '|': return PIPE; - case '/': return DIV; - case '%': return MOD; - case '<': return LT; - case '>': return GT; + case '#': return TK_HASHTAG; + case '(': return TK_LPAREN; + case ')': return TK_RPAREN; + case '{': return TK_LCURLY; + case '}': return TK_RCURLY; + case '[': return TK_LSQUARE; + case ']': return TK_RSQUARE; + case ':': return TK_COLON; + case ';': return TK_SEMI; + case ',': return TK_COMMA; + case '.': return TK_DOT; + case '?': return TK_QMARK; + case '!': return TK_NOT; + case '^': return TK_XOR; + case '&': return TK_AMP; + case '*': return TK_STAR; + case '-': return TK_NEG; + case '=': return TK_ASSIGN; + case '+': return TK_PLUS; + case '\\': return TK_BSLASH; + case '|': return TK_PIPE; + case '/': return TK_DIV; + case '%': return TK_MOD; + case '<': return TK_LT; + case '>': return TK_GT; } LEXER_PANIC("unexpected token %c", c); } @@ -337,3 +369,7 @@ bool lexer_pop(struct token* p_token) { return true; } + +bool lexer_eof() { + return lookahead == EOF; +} |
