feat: started project and lexer

main
Jef Roosens 2024-03-04 12:18:48 +01:00
commit a6c17eff5f
Signed by: Jef Roosens
GPG Key ID: B75D4F293C7052DB
8 changed files with 344 additions and 0 deletions

5
.editorconfig 100644
View File

@ -0,0 +1,5 @@
root = true
[*.{c,cpp,h}]
indent_style = space
indent_size = 2

5
.gitignore vendored 100644
View File

@ -0,0 +1,5 @@
build/
.cache/
compile_commands.json
.cache/
vgcore.*

134
Makefile 100644
View File

@ -0,0 +1,134 @@
# https://spin.atomicobject.com/2016/08/26/makefile-c-projects/ was a great
# base for this Makefile
-include config.mk
LIB := $(BUILD_DIR)/$(LIB_FILENAME)
SRCS != find '$(SRC_DIR)' -iname '*.c'
SRCS_H != find include -iname '*.h'
SRCS_H_INTERNAL != find $(SRC_DIR) -iname '*.h'
SRCS_TEST != find '$(TEST_DIR)' -iname '*.c'
SRCS_EXAMPLE != find '$(EXAMPLE_DIR)' -iname '*.c'
OBJS := $(SRCS:%=$(BUILD_DIR)/%.o)
OBJS_TEST := $(SRCS_TEST:%=$(BUILD_DIR)/%.o)
OBJS_EXAMPLE := $(SRCS_EXAMPLE:%=$(BUILD_DIR)/%.o)
DEPS := $(SRCS:%=$(BUILD_DIR)/%.d) $(SRCS_TEST:%=$(BUILD_DIR)/%.d)
BINS_TEST := $(OBJS_TEST:%.c.o=%)
BINS_EXAMPLE := $(OBJS_EXAMPLE:%.c.o=%)
TARGETS_TEST := $(BINS_TEST:%=test-%)
TARGETS_MEM_TEST := $(BINS_TEST:%=test-mem-%)
TARGETS_EXAMPLE := $(BINS_EXAMPLE:%=example-%)
_CFLAGS := $(addprefix -I,$(INC_DIRS)) $(CFLAGS) -Wall -Wextra
.PHONY: all
all: lib
# =====COMPILATION=====
# Utility used by the CI to lint
.PHONY: objs
objs: $(OBJS)
.PHONY: lib
lib: $(LIB)
$(LIB): $(OBJS)
ar -rcs $@ $(OBJS)
$(BUILD_DIR)/$(SRC_DIR)/%.c.o: $(SRC_DIR)/%.c
mkdir -p $(dir $@)
$(CC) -c $(_CFLAGS) $< -o $@
# =====TESTING=====
.PHONY: test
test: $(TARGETS_TEST)
.PHONY: test-mem
test-mem: $(TARGETS_MEM_TEST)
.PHONY: $(TARGETS_TEST)
$(TARGETS_TEST): test-%: %
./$^
.PHONY: $(TARGETS_MEM_TEST)
$(TARGETS_MEM_TEST): test-mem-%: %
valgrind --tool=memcheck --error-exitcode=1 --track-origins=yes --leak-check=full ./$^
.PHONY: build-test
build-test: $(BINS_TEST)
$(BINS_TEST): %: %.c.o $(LIB)
$(CC) \
$^ -o $@
# Along with the include directory, each test includes $(TEST_DIR) (which
# contains the acutest.h header file), and the src directory of the module it's
# testing. This allows tests to access internal methods, which aren't publicly
# exposed.
$(BUILD_DIR)/$(TEST_DIR)/%.c.o: $(TEST_DIR)/%.c
mkdir -p $(dir $@)
$(CC) $(_CFLAGS) -I$(TEST_DIR) \
-I$(SRC_DIR)/_include \
-c $< -o $@
# =====EXAMPLES=====
.PHONY: build-example
build-example: $(BINS_EXAMPLE)
$(BINS_EXAMPLE): %: %.c.o $(LIB)
$(CC) \
$^ -o $@
# Example binaries link the resulting library
$(BUILD_DIR)/$(EXAMPLE_DIR)/%.c.o: $(EXAMPLE_DIR)/%.c
mkdir -p $(dir $@)
$(CC) $(_CFLAGS) -I$(PUB_INC_DIR) -c $< -o $@
# =====MAINTENANCE=====
.PHONY: lint
lint:
clang-format -n --Werror \
$(filter-out $(THIRDPARTY),$(SRCS)) \
$(filter-out $(THIRDPARTY),$(SRCS_H)) \
$(filter-out $(THIRDPARTY),$(SRCS_H_INTERNAL))
.PHONY: fmt
fmt:
clang-format -i \
$(filter-out $(THIRDPARTY),$(SRCS)) \
$(filter-out $(THIRDPARTY),$(SRCS_H)) \
$(filter-out $(THIRDPARTY),$(SRCS_H_INTERNAL))
.PHONY: check
check:
mkdir -p $(BUILD_DIR)/cppcheck
cppcheck \
$(addprefix -I,$(INC_DIRS)) \
--cppcheck-build-dir=$(BUILD_DIR)/cppcheck \
--error-exitcode=1 \
--enable=warning,style \
--inline-suppr \
--check-level=exhaustive \
--quiet \
-j$(shell nproc) \
$(filter-out $(THIRDPARTY),$(SRCS))
.PHONY: clean
clean:
rm -rf '$(BUILD_DIR)'
.PHONY: bear
bear: clean
bear -- make
bear --append -- make build-test
bear --append -- make build-example
# Make make aware of the .d files
-include $(DEPS)

17
config.mk 100644
View File

@ -0,0 +1,17 @@
LIB_FILENAME = libmrk.a
BUILD_DIR = build
SRC_DIR = src
TEST_DIR = test
EXAMPLE_DIR = example
THIRDPARTY =
PUB_INC_DIR = include
INC_DIRS = $(PUB_INC_DIR) src/_include
# -MMD: generate a .d file for every source file. This file can be imported by
# make and makes make aware that a header file has been changed, ensuring an
# object file is also recompiled if only a header is changed.
# -MP: generate a dummy target for every header file (according to the docs it
# prevents some errors when removing header files)
CFLAGS ?= -MMD -MP -g

View File

@ -0,0 +1,19 @@
#ifndef MRK_COMMON
#define MRK_COMMON
#include <stdlib.h>
#define MRK_CALLOC(out, n, size) \
{ \
void *temp = calloc(n, size); \
if (temp == NULL) \
return mrk_err_failed_alloc; \
*out = temp; \
}
typedef enum mrk_err {
mrk_err_ok = 0,
mrk_err_failed_alloc,
} mrk_err;
#endif

View File

@ -0,0 +1,51 @@
#ifndef MRK_LEXER
#define MRK_LEXER
#include <stdbool.h>
#include "mrk/common.h"
typedef struct mrk_lexer mrk_lexer;
typedef enum mrk_lexer_err {
mrk_lexer_err_ok = 0,
mrk_lexer_err_done,
mrk_lexer_err_unexpected_char,
} mrk_lexer_err;
typedef enum mrk_token_type {
mrk_token_type_pound = 0,
} mrk_token_type;
typedef struct mrk_token {
mrk_token_type type;
size_t start;
size_t end;
} mrk_token;
/**
* Initialize a new lexer struct.
*/
mrk_err mrk_lexer_init(mrk_lexer **out);
/**
* Open the buffer with the given lexer struct. `buf` is expected to live for
* the duration of the lexing.
*
* The lexer will run either until `len` characters have been matched, or until
* a nul character has been reached. If `len` is set to 0, only the nul check is
* used to determine the end of the buffer.
*/
void mrk_lexer_open(mrk_lexer *lexer, const char *buf, size_t len);
/**
* Returns whether the lexer is done.
*/
bool mrk_lexer_at_end(const mrk_lexer *lexer);
/**
* Output the next lexed token for the given input.
*/
mrk_lexer_err mrk_lexer_next(mrk_token *out, mrk_lexer *lexer);
#endif

View File

@ -0,0 +1,40 @@
#ifndef MRK_LEXER_INTERNAL
#define MRK_LEXER_INTERNAL
#include "mrk/lexer.h"
struct mrk_lexer {
struct {
const char *s;
size_t len;
} buf;
struct {
size_t line;
size_t line_index;
size_t buf_index;
} pos;
struct {
size_t start;
size_t end;
} token;
};
/**
* Return the next character that would be consumed by a call to advance. At the
* end of the buffer, this value is nul.
*/
char mrk_lexer_peek(mrk_lexer *lexer);
/**
* Advance the current position by one character, adding the new character to
* the curent token's context and returning it.
*/
char mrk_lexer_advance(mrk_lexer *lexer);
/**
* Output the currently matched token to the token struct with the given type,
* and reset the lexer's tracked token.
*/
void mrk_lexer_emit(mrk_token *out, mrk_lexer *lexer, mrk_token_type type);
#endif

73
src/lexer/lexer.c 100644
View File

@ -0,0 +1,73 @@
#include "mrk/lexer_internal.h"
mrk_err mrk_lexer_init(mrk_lexer **out) {
MRK_CALLOC(out, 1, sizeof(mrk_lexer));
return mrk_err_ok;
}
void mrk_lexer_open(mrk_lexer *lexer, const char *buf, size_t len) {
lexer->buf.s = buf;
lexer->buf.len = len;
lexer->pos.line = 0;
lexer->pos.buf_index = 0;
lexer->token.start = 0;
lexer->token.end = 0;
}
bool mrk_lexer_at_end(const mrk_lexer *lexer) {
return (lexer->buf.len > 0 && lexer->pos.buf_index == lexer->buf.len) ||
(lexer->buf.s[lexer->pos.buf_index] == '\0');
}
char mrk_lexer_advance(mrk_lexer *lexer) {
if (mrk_lexer_at_end(lexer)) {
return '\0';
}
char c = lexer->buf.s[lexer->pos.buf_index];
// A newline is still part of the previous line, so if the last character was
// a newline, we now go to the next line
if (lexer->buf.s[lexer->pos.buf_index] == '\0') {
lexer->pos.line++;
lexer->pos.line_index = 0;
} else {
lexer->pos.line_index++;
}
lexer->pos.buf_index++;
lexer->token.end++;
return c;
}
char mrk_lexer_peek(mrk_lexer *lexer) {
if (mrk_lexer_at_end(lexer)) {
return '\0';
}
return lexer->buf.s[lexer->pos.buf_index];
}
void mrk_lexer_emit(mrk_token *out, mrk_lexer *lexer, mrk_token_type type) {
out->type = type;
out->start = lexer->token.start;
out->start = lexer->token.end;
lexer->token.start = lexer->token.end;
}
mrk_lexer_err mrk_lexer_next(mrk_token *out, mrk_lexer *lexer) {
if (mrk_lexer_at_end(lexer)) {
return mrk_lexer_err_done;
}
switch (mrk_lexer_advance(lexer)) {
case '#':
mrk_lexer_emit(out, lexer, mrk_token_type_pound);
break;
}
return mrk_lexer_err_ok;
}