From 414a608c36b2d8f208ad0223219736d7582948ae Mon Sep 17 00:00:00 2001 From: Carson Fleming Date: Fri, 27 Mar 2026 11:27:08 -1000 Subject: fix some stuff --- ccc.h | 2 +- codegen.c | 3 +- lexer.c | 49 +++++++++++++++-------------- lexer.h | 1 - main.c | 17 +++++----- parser.c | 101 +++++++++++++++++++++++++++++++---------------------------- test/weird.c | 2 +- 7 files changed, 92 insertions(+), 83 deletions(-) diff --git a/ccc.h b/ccc.h index 36f80f9..80cbc0a 100644 --- a/ccc.h +++ b/ccc.h @@ -2,6 +2,6 @@ #define CCC_H #define CCC_PANIC { perror("ccc"); exit(1); } - +#define PTR_SIZE 8 #endif diff --git a/codegen.c b/codegen.c index ab3660b..d0905e2 100644 --- a/codegen.c +++ b/codegen.c @@ -81,13 +81,14 @@ static void emit_expr( break; case EXPR_VAR_REF: emit_var_ref(outfile, &node->as._var_ref, storage); + break; } } static void emit_stmt(FILE* outfile, const struct stmt_node* node); static unsigned long long get_type_size(const struct type_node* type) { - if (type->ptr_level > 0) return 8; + if (type->ptr_level > 0) return PTR_SIZE; struct type_def type_def; if (!scope_get_type(scope, &type_def, type->name)) diff --git a/lexer.c b/lexer.c index 9d0e596..f5072a1 100644 --- a/lexer.c +++ b/lexer.c @@ -10,6 +10,8 @@ static int lookahead; static const char* PATH; static unsigned long LINE, COL; +static struct token tok = {.type = TK_NOT_FOUND}; + #define LEXER_PANIC(format, ...) {\ fprintf(\ stderr,\ @@ -21,6 +23,8 @@ static unsigned long LINE, COL; exit(1);\ } +static void lexer_advance(); + void lexer_load(const char* path) { if (file != NULL) { fclose(file); @@ -32,6 +36,7 @@ void lexer_load(const char* path) { PATH = path; LINE = 1; COL = 1; + lexer_advance(); } void lexer_close() { @@ -41,20 +46,9 @@ void lexer_close() { } bool lexer_peek(struct token* p_token) { - if (file == NULL) return false; - - long orig_offset = ftell(file); - int orig_lookahead = lookahead; - unsigned long orig_line = LINE, orig_col = COL; - - bool rv = lexer_pop(p_token); - - LINE = orig_line; - COL = orig_col; - lookahead = orig_lookahead; - fseek(file, orig_offset, SEEK_SET); - - return rv; + if (tok.type == TK_NOT_FOUND) return false; + if (p_token != NULL) *p_token = tok; + return true; } #define is_whitespace(c) (c == ' ' || c == '\t' || c == '\n') @@ -328,7 +322,7 @@ static enum token_type lex_simple_operator(char c) { LEXER_PANIC("unexpected token %c", c); } -bool lexer_pop(struct token* p_token) { +static bool lexer_read() { if (file == NULL) return false; // consume all whitespace and comments preceding the next token @@ -355,21 +349,28 @@ bool lexer_pop(struct token* p_token) { } if (is_numeric(c)) - lex_int_lit(p_token, c - '0'); + lex_int_lit(&tok, c - '0'); else if (c == '.' && is_numeric(lookahead)) - lex_float_lit(p_token, 10, 0); + lex_float_lit(&tok, 10, 0); else if (is_ident_legal(c)) - lex_ident(p_token, c); + lex_ident(&tok, c); else if (c == '\'') - lex_char_lit(p_token); + lex_char_lit(&tok); else if (c == '"') - lex_str_lit(p_token); - else if (!lex_complex_operator(p_token, c)) - p_token->type = lex_simple_operator(c); + lex_str_lit(&tok); + else if (!lex_complex_operator(&tok, c)) + tok.type = lex_simple_operator(c); return true; } -bool lexer_eof() { - return lookahead == EOF; +static void lexer_advance() { + if (!lexer_read()) tok.type = TK_NOT_FOUND; +} + +bool lexer_pop(struct token* p_token) { + if (tok.type == TK_NOT_FOUND) return false; + if (p_token != NULL) *p_token = tok; + lexer_advance(); + return true; } diff --git a/lexer.h b/lexer.h index acb8eb5..aefca82 100644 --- a/lexer.h +++ b/lexer.h @@ -77,6 +77,5 @@ void lexer_load(const char* path); void lexer_close(); bool lexer_peek(struct token* p_token); bool lexer_pop(struct token* p_token); -bool lexer_eof(); #endif diff --git a/main.c b/main.c index e2aca11..5c26dd3 100644 --- a/main.c +++ b/main.c @@ -3,6 +3,7 @@ #include "codegen.h" #include #include +#include void test_lexer(int argc, char** argv) { struct token token; @@ -37,15 +38,17 @@ void test_lexer(int argc, char** argv) { } void test_parser(int argc, char** argv) { - struct root_node* root; - struct root_node** p_cur = &root; for (int i = 1; i < argc; i++) { - *p_cur = parse(argv[i]); - p_cur = &((*p_cur)->next); - } + struct root_node* root = parse(argv[i]); + unsigned int fn_sz = strlen(argv[i]); + char outfile[fn_sz + 1]; + strcpy(outfile, argv[i]); + outfile[fn_sz - 1] = 's'; + outfile[fn_sz] = 0; - emit_code(root, "test/simple.s"); - ast_destroy(root); + emit_code(root, outfile); + ast_destroy(root); + } } int main(int argc, char** argv) { diff --git a/parser.c b/parser.c index 699f345..d4af5a0 100644 --- a/parser.c +++ b/parser.c @@ -33,6 +33,12 @@ static void unexpected_token(enum token_type expected) { PARSER_PANIC("unexpected token"); } +/* TODO: reorganize the lexer to make peek cheaper */ +static void peek_or_panic() { + if (!lexer_peek(&tok)) + PARSER_PANIC("unexpected EOF"); +} + static void expect(enum token_type expected) { if (!lexer_pop(&tok)) PARSER_PANIC("unexpected EOF"); @@ -40,46 +46,38 @@ static void expect(enum token_type expected) { if (tok.type != expected) unexpected_token(expected); } -static void peek_or_panic() { - if (!lexer_peek(&tok)) - PARSER_PANIC("unexpected EOF"); -} +static void expect_kw(const char* kw) { + if (!lexer_pop(&tok)) + PARSER_PANIC("unexpected EOF, expected %s", kw); -/* "handle" indicates that we've peeked already */ -static void handle_expr(struct expr_node* p_node); -static void handle_stmt(struct stmt_node* p_node); + if (tok.type != TK_IDENT) + PARSER_PANIC("unexpected token, expected %s", kw); + + if (strcmp(kw, tok.data.ident) != 0) + PARSER_PANIC( + "unexpected identifier %s, expected %s", tok.data.ident, kw); + + /* string won't go in the AST, discard it */ + free(tok.data.ident); + tok.data.ident = NULL; +} -static void handle_type(struct type_node* p_node) { +static void parse_type(struct type_node* p_node) { /* TODO: need some concept of known types in scope */ /* TODO: modifiers, void rules, arrays, etc. */ /* TODO: struct, union, enum */ + expect(TK_IDENT); p_node->name = tok.data.ident; peek_or_panic(); p_node->ptr_level = 0; while (tok.type == TK_STAR) { - p_node->ptr_level++; expect(TK_STAR); + p_node->ptr_level++; + peek_or_panic(); } } -static void parse_return(struct return_node* p_node) { - expect(TK_IDENT); - if (strcmp(tok.data.ident, "return") != 0) - PARSER_PANIC("unexpected token %s; expected: return", tok.data.ident); - - if (!lexer_peek(&tok)) - PARSER_PANIC("unexpected EOF in return statement"); - - if (tok.type == TK_SEMI) { - p_node->ret_val = NULL; - return; - } - - p_node->ret_val = protected_alloc(sizeof(struct expr_node)); - handle_expr(p_node->ret_val); -} - static void parse_int_lit(struct int_lit_node* p_node) { expect(TK_INT_LIT); p_node->val = tok.data.int_lit; @@ -90,7 +88,8 @@ static void parse_var_ref(struct var_ref_node* p_node) { p_node->ident = tok.data.ident; } -static void handle_expr(struct expr_node* p_node) { +static void parse_expr(struct expr_node* p_node) { + peek_or_panic(); switch (tok.type) { case TK_SEMI: p_node->type = EXPR_EMPTY; @@ -108,31 +107,45 @@ static void handle_expr(struct expr_node* p_node) { } } -static void handle_var_decl(struct var_decl_node* p_node) { - handle_type(&p_node->type); +static void parse_var_decl(struct var_decl_node* p_node) { + parse_type(&p_node->type); expect(TK_IDENT); p_node->ident = tok.data.ident; } +static void parse_stmt(struct stmt_node* p_node); + +static void parse_return(struct return_node* p_node) { + expect_kw("return"); + + peek_or_panic(); + if (tok.type == TK_SEMI) { + p_node->ret_val = NULL; + return; + } + + p_node->ret_val = protected_alloc(sizeof(struct expr_node)); + parse_expr(p_node->ret_val); +} + static void parse_group(struct group_node* p_node) { expect(TK_LCURLY); struct stmt_node** pp_node = &p_node->body_head; for (;;) { - if (!lexer_peek(&tok)) - PARSER_PANIC("unexpected EOF in statement group"); - + peek_or_panic(); if (tok.type == TK_RCURLY) break; *pp_node = protected_alloc(sizeof(struct stmt_node)); - handle_stmt(*pp_node); + parse_stmt(*pp_node); pp_node = &((*pp_node)->next); } expect(TK_RCURLY); } -static void handle_stmt(struct stmt_node* p_node) { +static void parse_stmt(struct stmt_node* p_node) { + peek_or_panic(); switch (tok.type) { case TK_LCURLY: p_node->type = STMT_GROUP; @@ -145,45 +158,37 @@ static void handle_stmt(struct stmt_node* p_node) { break; } else if (scope_get_type(scope, NULL, tok.data.ident)) { p_node->type = STMT_VAR_DECL; - handle_var_decl(&p_node->as._var_decl); + parse_var_decl(&p_node->as._var_decl); break; } default: p_node->type = STMT_EXPR; - handle_expr(&p_node->as._expr); + parse_expr(&p_node->as._expr); } expect(TK_SEMI); } static void parse_arg_list(struct var_decl_node** pp_arg) { for (;;) { - expect(TK_IDENT); - *pp_arg = protected_alloc(sizeof(struct var_decl_node)); - handle_var_decl(*pp_arg); + parse_var_decl(*pp_arg); pp_arg = &((*pp_arg)->next); - if (!lexer_peek(&tok)) - PARSER_PANIC("unexpected EOF in argument list"); - + peek_or_panic(); if (tok.type == TK_RPAREN) break; expect(TK_COMMA); } } static void parse_fn_decl(struct fn_decl_node* p_node) { - expect(TK_IDENT); - handle_type(&p_node->return_type); + parse_type(&p_node->return_type); expect(TK_IDENT); p_node->name = tok.data.ident; expect(TK_LPAREN); - if (!lexer_peek(&tok)) - PARSER_PANIC("unexpected EOF in function declaration"); - - + peek_or_panic(); if (tok.type != TK_RPAREN) parse_arg_list(&p_node->args_head); expect(TK_RPAREN); diff --git a/test/weird.c b/test/weird.c index 74a74be..bc49f3f 100644 --- a/test/weird.c +++ b/test/weird.c @@ -1,3 +1,3 @@ -int main(int argc) { +int main(int argc, char** argv) { return argc; } -- cgit v1.2.3