summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCarson Fleming <[email protected]>2026-03-13 01:05:34 -0400
committerCarson Fleming <[email protected]>2026-03-13 01:05:34 -0400
commit7a361c2e7385c2e670a0e2cc8d9092814ea17253 (patch)
tree4573177a388412a2e7bc1b39df2e8569e7f9316e
downloadccc-7a361c2e7385c2e670a0e2cc8d9092814ea17253.tar.gz
not even compiled once but we ball
-rw-r--r--.clangd4
-rw-r--r--.gitignore6
-rw-r--r--README.md3
-rw-r--r--ccc.h12
-rw-r--r--lexer.c210
-rw-r--r--lexer.h72
-rw-r--r--main.c0
7 files changed, 307 insertions, 0 deletions
diff --git a/.clangd b/.clangd
new file mode 100644
index 0000000..f52a76d
--- /dev/null
+++ b/.clangd
@@ -0,0 +1,4 @@
+CompileFlags:
+ Add:
+ - "-xc"
+ - "-std=c23"
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..4c0c7e0
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,6 @@
+*.o
+*.out
+build/**
+.*
+!.git*
+!.clangd
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..c979324
--- /dev/null
+++ b/README.md
@@ -0,0 +1,3 @@
+# Carson's C Compiler
+
+fuck it, we ball
diff --git a/ccc.h b/ccc.h
new file mode 100644
index 0000000..6b41480
--- /dev/null
+++ b/ccc.h
@@ -0,0 +1,12 @@
+#ifndef CCC_H
+#define CCC_H
+#include <stdio.h>
+#include <stdlib.h>
+
+#define CCC_PANIC { perror("ccc"); exit(1); }
+#define CCC_ERROR(format, ...) {\
+ fprintf(stderr, "line %ld: " format "\n", LINE __VA_OPT__(,) __VA_ARGS__);\
+ exit(1);\
+}
+
+#endif
diff --git a/lexer.c b/lexer.c
new file mode 100644
index 0000000..04aada4
--- /dev/null
+++ b/lexer.c
@@ -0,0 +1,210 @@
+#include "ccc.h"
+#include "lexer.h"
+#include <string.h>
+#include <stdckdint.h>
+
+static FILE* file = NULL;
+static int lookahead;
+static long LINE;
+
+void lexer_load(const char* path) {
+ if (file != NULL) {
+ fclose(file);
+ }
+ file = fopen(path, "r");
+ if (file == NULL) CCC_PANIC;
+
+ lookahead = fgetc(file);
+ LINE = 1;
+}
+
+bool lexer_peek(struct token* p_token) {
+ if (file == NULL) return false;
+
+ long orig_offset = ftell(file);
+ int orig_lookahead = lookahead;
+ bool rv = lexer_pop(p_token);
+ lookahead = orig_lookahead;
+ fseek(file, orig_offset, SEEK_SET);
+ return rv;
+}
+
+#define is_whitespace(c) (c == ' ' || c == '\t' || c == '\n')
+#define is_lower_alpha(c) ('a' <= c && c <= 'z')
+#define is_upper_alpha(c) ('A' <= c && c <= 'Z')
+#define is_alpha(c) (is_lower_alpha(c) || is_upper_alpha(c))
+#define is_numeric(c) ('0' <= c && c <= '9')
+#define is_alphanumeric(c) (is_alpha(c) || is_numeric(c))
+#define is_ident_legal(c) (is_alphanumeric(c) || c == '_' || c == '$')
+
+#define REFUND_CHAR fseek(file, -1, SEEK_CUR)
+
+static int consume_char() {
+ int rv = lookahead;
+ lookahead = fgetc(file);
+ return rv;
+}
+
+static void lex_ident(struct token* p_token, char ic) {
+ char buf[1024] = {ic};
+ unsigned int len = 1;
+
+ while (is_ident_legal(lookahead)) {
+ int c = consume_char();
+ if (len >= sizeof(buf) - 1)
+ CCC_ERROR("identifier exceeds maximum size (%ld)", sizeof(buf));
+ buf[len++] = c;
+ }
+
+ buf[len] = 0;
+ *p_token = (struct token) {
+ .type = IDENTIFIER,
+ .data.identifier = strndup(buf, sizeof(buf) - 1),
+ };
+}
+
+static void lex_float_lit(
+ struct token* p_token,
+ unsigned char base,
+ double iv
+) {
+ CCC_ERROR("lexer: floating point literals are not supported yet");
+}
+
+static void lex_int_lit(struct token* p_token, intlit_t iv) {
+ unsigned char base = 10;
+
+ /* TODO: exponentiation, 2e10 f.e. */
+ if (iv == 0) {
+ if (lookahead == 'x' || lookahead == 'X'
+ || lookahead == 'b' || lookahead == 'B') {
+ base = (lookahead == 'x' || lookahead == 'X') ? 16 : 2;
+ int suffix = consume_char();
+ if (!is_alphanumeric(lookahead))
+ CCC_ERROR(
+ "lexer: invalid suffix on integer constant: %c", suffix);
+ } else base = 8;
+ }
+
+ while (is_alphanumeric(lookahead)) {
+ int c = consume_char();
+ intlit_t c_val;
+
+ if (is_numeric(c)) c_val = c - '0';
+ else if (is_lower_alpha(c)) c_val = c - 'a' + 10;
+ else c_val = c - 'A' + 10;
+
+ if (c_val >= base)
+ CCC_ERROR(
+ "lexer: invalid digit in base %hhu: %c",
+ base,
+ c);
+
+ if (ckd_mul(&iv, iv, base))
+ CCC_ERROR(
+ "lexer: integer literal will overflow");
+ if (ckd_add(&iv, iv, c_val))
+ CCC_ERROR(
+ "lexer: integer literal will overflow");
+ }
+
+ if (lookahead == '.') {
+ consume_char();
+ lex_float_lit(p_token, base, iv);
+ return;
+ }
+
+ *p_token = (struct token) {
+ .type = INT_LIT,
+ .data.int_lit = iv,
+ };
+}
+
+static void lex_char_lit(struct token* p_token) {
+ int c = consume_char();
+ if (c == EOF) CCC_ERROR("lexer: unexpected EOF in char literal");
+
+ if (c == '\\') {
+ c = consume_char();
+ if (c == EOF) CCC_ERROR("lexer: unexpected EOF in char literal");
+
+ if (c == '\'') c = '\'';
+ else if (c == '\"') c = '\"';
+ else CCC_ERROR(
+ "lexer: escape sequences other than quotes are not supported yet");
+ }
+
+ int close_quote = consume_char();
+ if (close_quote == EOF) CCC_ERROR("lexer: unexpected EOF in char literal");
+ if (close_quote != '\'')
+ CCC_ERROR("lexer: expected \"'\", not \"%c\"", close_quote);
+
+ *p_token = (struct token) {
+ .type = CHAR_LIT,
+ .data.char_lit = c,
+ };
+}
+
+static void lex_str_lit(struct token* p_token) {
+
+}
+
+enum token_type lex_simple(char c) {
+ switch (c) {
+ case '*': return STAR; /* TODO: *= */
+ case '#': return HASHTAG;
+ case '(': return LPAREN;
+ case ')': return RPAREN;
+ case '{': return LCURLY;
+ case '}': return RCURLY;
+ case '[': return LSQUARE;
+ case ']': return RSQUARE;
+ case ':': return COLON;
+ case ';': return SEMI;
+ case ',': return COMMA;
+ case '.': return DOT;
+ case '?': return QMARK;
+ }
+ CCC_ERROR("lexer: unexpected token %c", c);
+}
+
+bool lexer_pop(struct token* p_token) {
+ /* TODO: e.g. float f = .25; */
+ if (file == NULL) return false;
+
+ // consume all whitespace and comments preceding the next token
+ int c;
+ for (;;) {
+ c = consume_char();
+ // one of these
+ if (c == EOF) return false;
+ else if (c == '/' && lookahead == '/') {
+ while (lookahead != EOF && lookahead != '\n') consume_char();
+ }
+ else if (c == '/' && lookahead == '*') {
+ consume_char(); /* consume the * */
+ int c = consume_char();
+ while (c != EOF && (c != '*' || lookahead != '/'))
+ c = consume_char();
+ if (c == EOF) CCC_ERROR("unterminated /* comment");
+ consume_char(); /* consume the final / */
+ }
+ else if (c == '\n') LINE++;
+ else if (!is_whitespace(c)) break;
+ }
+
+ if (is_numeric(c))
+ lex_int_lit(p_token, c - '0');
+ else if (c == '.' && is_numeric(lookahead))
+ lex_float_lit(p_token, 10, 0);
+ else if (is_ident_legal(c))
+ lex_ident(p_token, c);
+ else if (c == '\'')
+ lex_char_lit(p_token);
+ else if (c == '"')
+ lex_str_lit(p_token);
+ else
+ *p_token = (struct token) {.type = lex_simple(c)};
+
+ return true;
+}
diff --git a/lexer.h b/lexer.h
new file mode 100644
index 0000000..62ee9c2
--- /dev/null
+++ b/lexer.h
@@ -0,0 +1,72 @@
+#ifndef LEXER_H
+#define LEXER_H
+
+enum token_type {
+ IDENTIFIER,
+ INT_LIT,
+ CHAR_LIT,
+ STR_LIT,
+ HASHTAG,
+ LPAREN,
+ RPAREN,
+ LCURLY,
+ RCURLY,
+ LSQUARE,
+ RSQUARE,
+ COLON,
+ SEMI,
+ COMMA,
+ DOT,
+ QMARK,
+ NOT,
+ NEQ,
+ XOR,
+ XEQ,
+ AMP,
+ LOG_AND,
+ AND_EQ,
+ STAR,
+ MUL_EQ,
+ NEG,
+ NEG_EQ,
+ ARROW,
+ ASSIGN,
+ TEST_EQ,
+ PLUS,
+ PLUS_EQ,
+ BSLASH,
+ PIPE,
+ LOG_PIPE,
+ PIPE_EQ,
+ DIV,
+ DIV_EQ, // comments too
+ LT,
+ GT,
+ LEQ,
+ GEQ,
+ SHR,
+ SHR_EQ,
+ SHL,
+ SHL_EQ
+ /* more to come */
+ // ->, everything that can precede = (multi-symbols)
+};
+
+typedef unsigned long long intlit_t;
+
+struct token {
+ enum token_type type;
+ union {
+ char* identifier;
+ intlit_t int_lit;
+ char char_lit;
+ char* str_lit;
+ void* unused;
+ } data;
+};
+
+void lexer_load(const char* path);
+bool lexer_peek(struct token* p_token);
+bool lexer_pop(struct token* p_token);
+
+#endif
diff --git a/main.c b/main.c
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/main.c