From a6c17eff5f2608f020de24d406a6aea3c620bc2f Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Mon, 4 Mar 2024 12:18:48 +0100 Subject: [PATCH] feat: started project and lexer --- .editorconfig | 5 ++ .gitignore | 5 ++ Makefile | 134 ++++++++++++++++++++++++++++++ config.mk | 17 ++++ include/mrk/common.h | 19 +++++ include/mrk/lexer.h | 51 ++++++++++++ src/_include/mrk/lexer_internal.h | 40 +++++++++ src/lexer/lexer.c | 73 ++++++++++++++++ 8 files changed, 344 insertions(+) create mode 100644 .editorconfig create mode 100644 .gitignore create mode 100644 Makefile create mode 100644 config.mk create mode 100644 include/mrk/common.h create mode 100644 include/mrk/lexer.h create mode 100644 src/_include/mrk/lexer_internal.h create mode 100644 src/lexer/lexer.c diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..09faabf --- /dev/null +++ b/.editorconfig @@ -0,0 +1,5 @@ +root = true + +[*.{c,cpp,h}] +indent_style = space +indent_size = 2 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..dd72998 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +build/ +.cache/ +compile_commands.json +.cache/ +vgcore.* diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..e52bb1f --- /dev/null +++ b/Makefile @@ -0,0 +1,134 @@ +# https://spin.atomicobject.com/2016/08/26/makefile-c-projects/ was a great +# base for this Makefile + +-include config.mk + +LIB := $(BUILD_DIR)/$(LIB_FILENAME) + +SRCS != find '$(SRC_DIR)' -iname '*.c' +SRCS_H != find include -iname '*.h' +SRCS_H_INTERNAL != find $(SRC_DIR) -iname '*.h' +SRCS_TEST != find '$(TEST_DIR)' -iname '*.c' +SRCS_EXAMPLE != find '$(EXAMPLE_DIR)' -iname '*.c' + +OBJS := $(SRCS:%=$(BUILD_DIR)/%.o) +OBJS_TEST := $(SRCS_TEST:%=$(BUILD_DIR)/%.o) +OBJS_EXAMPLE := $(SRCS_EXAMPLE:%=$(BUILD_DIR)/%.o) + +DEPS := $(SRCS:%=$(BUILD_DIR)/%.d) $(SRCS_TEST:%=$(BUILD_DIR)/%.d) + +BINS_TEST := $(OBJS_TEST:%.c.o=%) +BINS_EXAMPLE := $(OBJS_EXAMPLE:%.c.o=%) + +TARGETS_TEST := $(BINS_TEST:%=test-%) +TARGETS_MEM_TEST := $(BINS_TEST:%=test-mem-%) +TARGETS_EXAMPLE := $(BINS_EXAMPLE:%=example-%) + +_CFLAGS := $(addprefix -I,$(INC_DIRS)) $(CFLAGS) -Wall -Wextra + +.PHONY: all +all: lib + + +# =====COMPILATION===== +# Utility used by the CI to lint +.PHONY: objs +objs: $(OBJS) + +.PHONY: lib +lib: $(LIB) +$(LIB): $(OBJS) + ar -rcs $@ $(OBJS) + +$(BUILD_DIR)/$(SRC_DIR)/%.c.o: $(SRC_DIR)/%.c + mkdir -p $(dir $@) + $(CC) -c $(_CFLAGS) $< -o $@ + +# =====TESTING===== +.PHONY: test +test: $(TARGETS_TEST) + +.PHONY: test-mem +test-mem: $(TARGETS_MEM_TEST) + +.PHONY: $(TARGETS_TEST) +$(TARGETS_TEST): test-%: % + ./$^ + +.PHONY: $(TARGETS_MEM_TEST) +$(TARGETS_MEM_TEST): test-mem-%: % + valgrind --tool=memcheck --error-exitcode=1 --track-origins=yes --leak-check=full ./$^ + +.PHONY: build-test +build-test: $(BINS_TEST) + +$(BINS_TEST): %: %.c.o $(LIB) + $(CC) \ + $^ -o $@ + +# Along with the include directory, each test includes $(TEST_DIR) (which +# contains the acutest.h header file), and the src directory of the module it's +# testing. This allows tests to access internal methods, which aren't publicly +# exposed. +$(BUILD_DIR)/$(TEST_DIR)/%.c.o: $(TEST_DIR)/%.c + mkdir -p $(dir $@) + $(CC) $(_CFLAGS) -I$(TEST_DIR) \ + -I$(SRC_DIR)/_include \ + -c $< -o $@ + +# =====EXAMPLES===== +.PHONY: build-example +build-example: $(BINS_EXAMPLE) + +$(BINS_EXAMPLE): %: %.c.o $(LIB) + $(CC) \ + $^ -o $@ + +# Example binaries link the resulting library +$(BUILD_DIR)/$(EXAMPLE_DIR)/%.c.o: $(EXAMPLE_DIR)/%.c + mkdir -p $(dir $@) + $(CC) $(_CFLAGS) -I$(PUB_INC_DIR) -c $< -o $@ + +# =====MAINTENANCE===== +.PHONY: lint +lint: + clang-format -n --Werror \ + $(filter-out $(THIRDPARTY),$(SRCS)) \ + $(filter-out $(THIRDPARTY),$(SRCS_H)) \ + $(filter-out $(THIRDPARTY),$(SRCS_H_INTERNAL)) + +.PHONY: fmt +fmt: + clang-format -i \ + $(filter-out $(THIRDPARTY),$(SRCS)) \ + $(filter-out $(THIRDPARTY),$(SRCS_H)) \ + $(filter-out $(THIRDPARTY),$(SRCS_H_INTERNAL)) + +.PHONY: check +check: + mkdir -p $(BUILD_DIR)/cppcheck + cppcheck \ + $(addprefix -I,$(INC_DIRS)) \ + --cppcheck-build-dir=$(BUILD_DIR)/cppcheck \ + --error-exitcode=1 \ + --enable=warning,style \ + --inline-suppr \ + --check-level=exhaustive \ + --quiet \ + -j$(shell nproc) \ + $(filter-out $(THIRDPARTY),$(SRCS)) + +.PHONY: clean +clean: + rm -rf '$(BUILD_DIR)' + + +.PHONY: bear +bear: clean + bear -- make + bear --append -- make build-test + bear --append -- make build-example + + +# Make make aware of the .d files +-include $(DEPS) diff --git a/config.mk b/config.mk new file mode 100644 index 0000000..0c88a69 --- /dev/null +++ b/config.mk @@ -0,0 +1,17 @@ +LIB_FILENAME = libmrk.a + +BUILD_DIR = build +SRC_DIR = src +TEST_DIR = test +EXAMPLE_DIR = example +THIRDPARTY = + +PUB_INC_DIR = include +INC_DIRS = $(PUB_INC_DIR) src/_include + +# -MMD: generate a .d file for every source file. This file can be imported by +# make and makes make aware that a header file has been changed, ensuring an +# object file is also recompiled if only a header is changed. +# -MP: generate a dummy target for every header file (according to the docs it +# prevents some errors when removing header files) +CFLAGS ?= -MMD -MP -g diff --git a/include/mrk/common.h b/include/mrk/common.h new file mode 100644 index 0000000..54163c5 --- /dev/null +++ b/include/mrk/common.h @@ -0,0 +1,19 @@ +#ifndef MRK_COMMON +#define MRK_COMMON + +#include + +#define MRK_CALLOC(out, n, size) \ + { \ + void *temp = calloc(n, size); \ + if (temp == NULL) \ + return mrk_err_failed_alloc; \ + *out = temp; \ + } + +typedef enum mrk_err { + mrk_err_ok = 0, + mrk_err_failed_alloc, +} mrk_err; + +#endif diff --git a/include/mrk/lexer.h b/include/mrk/lexer.h new file mode 100644 index 0000000..88af930 --- /dev/null +++ b/include/mrk/lexer.h @@ -0,0 +1,51 @@ +#ifndef MRK_LEXER +#define MRK_LEXER + +#include + +#include "mrk/common.h" + +typedef struct mrk_lexer mrk_lexer; + +typedef enum mrk_lexer_err { + mrk_lexer_err_ok = 0, + mrk_lexer_err_done, + mrk_lexer_err_unexpected_char, +} mrk_lexer_err; + +typedef enum mrk_token_type { + mrk_token_type_pound = 0, +} mrk_token_type; + +typedef struct mrk_token { + mrk_token_type type; + size_t start; + size_t end; +} mrk_token; + +/** + * Initialize a new lexer struct. + */ +mrk_err mrk_lexer_init(mrk_lexer **out); + +/** + * Open the buffer with the given lexer struct. `buf` is expected to live for + * the duration of the lexing. + * + * The lexer will run either until `len` characters have been matched, or until + * a nul character has been reached. If `len` is set to 0, only the nul check is + * used to determine the end of the buffer. + */ +void mrk_lexer_open(mrk_lexer *lexer, const char *buf, size_t len); + +/** + * Returns whether the lexer is done. + */ +bool mrk_lexer_at_end(const mrk_lexer *lexer); + +/** + * Output the next lexed token for the given input. + */ +mrk_lexer_err mrk_lexer_next(mrk_token *out, mrk_lexer *lexer); + +#endif diff --git a/src/_include/mrk/lexer_internal.h b/src/_include/mrk/lexer_internal.h new file mode 100644 index 0000000..9811761 --- /dev/null +++ b/src/_include/mrk/lexer_internal.h @@ -0,0 +1,40 @@ +#ifndef MRK_LEXER_INTERNAL +#define MRK_LEXER_INTERNAL + +#include "mrk/lexer.h" + +struct mrk_lexer { + struct { + const char *s; + size_t len; + } buf; + struct { + size_t line; + size_t line_index; + size_t buf_index; + } pos; + struct { + size_t start; + size_t end; + } token; +}; + +/** + * Return the next character that would be consumed by a call to advance. At the + * end of the buffer, this value is nul. + */ +char mrk_lexer_peek(mrk_lexer *lexer); + +/** + * Advance the current position by one character, adding the new character to + * the curent token's context and returning it. + */ +char mrk_lexer_advance(mrk_lexer *lexer); + +/** + * Output the currently matched token to the token struct with the given type, + * and reset the lexer's tracked token. + */ +void mrk_lexer_emit(mrk_token *out, mrk_lexer *lexer, mrk_token_type type); + +#endif diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c new file mode 100644 index 0000000..ccacbb3 --- /dev/null +++ b/src/lexer/lexer.c @@ -0,0 +1,73 @@ +#include "mrk/lexer_internal.h" + +mrk_err mrk_lexer_init(mrk_lexer **out) { + MRK_CALLOC(out, 1, sizeof(mrk_lexer)); + + return mrk_err_ok; +} + +void mrk_lexer_open(mrk_lexer *lexer, const char *buf, size_t len) { + lexer->buf.s = buf; + lexer->buf.len = len; + lexer->pos.line = 0; + lexer->pos.buf_index = 0; + lexer->token.start = 0; + lexer->token.end = 0; +} + +bool mrk_lexer_at_end(const mrk_lexer *lexer) { + return (lexer->buf.len > 0 && lexer->pos.buf_index == lexer->buf.len) || + (lexer->buf.s[lexer->pos.buf_index] == '\0'); +} + +char mrk_lexer_advance(mrk_lexer *lexer) { + if (mrk_lexer_at_end(lexer)) { + return '\0'; + } + + char c = lexer->buf.s[lexer->pos.buf_index]; + + // A newline is still part of the previous line, so if the last character was + // a newline, we now go to the next line + if (lexer->buf.s[lexer->pos.buf_index] == '\0') { + lexer->pos.line++; + lexer->pos.line_index = 0; + } else { + lexer->pos.line_index++; + } + + lexer->pos.buf_index++; + lexer->token.end++; + + return c; +} + +char mrk_lexer_peek(mrk_lexer *lexer) { + if (mrk_lexer_at_end(lexer)) { + return '\0'; + } + + return lexer->buf.s[lexer->pos.buf_index]; +} + +void mrk_lexer_emit(mrk_token *out, mrk_lexer *lexer, mrk_token_type type) { + out->type = type; + out->start = lexer->token.start; + out->start = lexer->token.end; + + lexer->token.start = lexer->token.end; +} + +mrk_lexer_err mrk_lexer_next(mrk_token *out, mrk_lexer *lexer) { + if (mrk_lexer_at_end(lexer)) { + return mrk_lexer_err_done; + } + + switch (mrk_lexer_advance(lexer)) { + case '#': + mrk_lexer_emit(out, lexer, mrk_token_type_pound); + break; + } + + return mrk_lexer_err_ok; +}