feat: started project and lexer
commit
a6c17eff5f
|
@ -0,0 +1,5 @@
|
||||||
|
root = true
|
||||||
|
|
||||||
|
[*.{c,cpp,h}]
|
||||||
|
indent_style = space
|
||||||
|
indent_size = 2
|
|
@ -0,0 +1,5 @@
|
||||||
|
build/
|
||||||
|
.cache/
|
||||||
|
compile_commands.json
|
||||||
|
.cache/
|
||||||
|
vgcore.*
|
|
@ -0,0 +1,134 @@
|
||||||
|
# https://spin.atomicobject.com/2016/08/26/makefile-c-projects/ was a great
|
||||||
|
# base for this Makefile
|
||||||
|
|
||||||
|
-include config.mk
|
||||||
|
|
||||||
|
LIB := $(BUILD_DIR)/$(LIB_FILENAME)
|
||||||
|
|
||||||
|
SRCS != find '$(SRC_DIR)' -iname '*.c'
|
||||||
|
SRCS_H != find include -iname '*.h'
|
||||||
|
SRCS_H_INTERNAL != find $(SRC_DIR) -iname '*.h'
|
||||||
|
SRCS_TEST != find '$(TEST_DIR)' -iname '*.c'
|
||||||
|
SRCS_EXAMPLE != find '$(EXAMPLE_DIR)' -iname '*.c'
|
||||||
|
|
||||||
|
OBJS := $(SRCS:%=$(BUILD_DIR)/%.o)
|
||||||
|
OBJS_TEST := $(SRCS_TEST:%=$(BUILD_DIR)/%.o)
|
||||||
|
OBJS_EXAMPLE := $(SRCS_EXAMPLE:%=$(BUILD_DIR)/%.o)
|
||||||
|
|
||||||
|
DEPS := $(SRCS:%=$(BUILD_DIR)/%.d) $(SRCS_TEST:%=$(BUILD_DIR)/%.d)
|
||||||
|
|
||||||
|
BINS_TEST := $(OBJS_TEST:%.c.o=%)
|
||||||
|
BINS_EXAMPLE := $(OBJS_EXAMPLE:%.c.o=%)
|
||||||
|
|
||||||
|
TARGETS_TEST := $(BINS_TEST:%=test-%)
|
||||||
|
TARGETS_MEM_TEST := $(BINS_TEST:%=test-mem-%)
|
||||||
|
TARGETS_EXAMPLE := $(BINS_EXAMPLE:%=example-%)
|
||||||
|
|
||||||
|
_CFLAGS := $(addprefix -I,$(INC_DIRS)) $(CFLAGS) -Wall -Wextra
|
||||||
|
|
||||||
|
.PHONY: all
|
||||||
|
all: lib
|
||||||
|
|
||||||
|
|
||||||
|
# =====COMPILATION=====
|
||||||
|
# Utility used by the CI to lint
|
||||||
|
.PHONY: objs
|
||||||
|
objs: $(OBJS)
|
||||||
|
|
||||||
|
.PHONY: lib
|
||||||
|
lib: $(LIB)
|
||||||
|
$(LIB): $(OBJS)
|
||||||
|
ar -rcs $@ $(OBJS)
|
||||||
|
|
||||||
|
$(BUILD_DIR)/$(SRC_DIR)/%.c.o: $(SRC_DIR)/%.c
|
||||||
|
mkdir -p $(dir $@)
|
||||||
|
$(CC) -c $(_CFLAGS) $< -o $@
|
||||||
|
|
||||||
|
# =====TESTING=====
|
||||||
|
.PHONY: test
|
||||||
|
test: $(TARGETS_TEST)
|
||||||
|
|
||||||
|
.PHONY: test-mem
|
||||||
|
test-mem: $(TARGETS_MEM_TEST)
|
||||||
|
|
||||||
|
.PHONY: $(TARGETS_TEST)
|
||||||
|
$(TARGETS_TEST): test-%: %
|
||||||
|
./$^
|
||||||
|
|
||||||
|
.PHONY: $(TARGETS_MEM_TEST)
|
||||||
|
$(TARGETS_MEM_TEST): test-mem-%: %
|
||||||
|
valgrind --tool=memcheck --error-exitcode=1 --track-origins=yes --leak-check=full ./$^
|
||||||
|
|
||||||
|
.PHONY: build-test
|
||||||
|
build-test: $(BINS_TEST)
|
||||||
|
|
||||||
|
$(BINS_TEST): %: %.c.o $(LIB)
|
||||||
|
$(CC) \
|
||||||
|
$^ -o $@
|
||||||
|
|
||||||
|
# Along with the include directory, each test includes $(TEST_DIR) (which
|
||||||
|
# contains the acutest.h header file), and the src directory of the module it's
|
||||||
|
# testing. This allows tests to access internal methods, which aren't publicly
|
||||||
|
# exposed.
|
||||||
|
$(BUILD_DIR)/$(TEST_DIR)/%.c.o: $(TEST_DIR)/%.c
|
||||||
|
mkdir -p $(dir $@)
|
||||||
|
$(CC) $(_CFLAGS) -I$(TEST_DIR) \
|
||||||
|
-I$(SRC_DIR)/_include \
|
||||||
|
-c $< -o $@
|
||||||
|
|
||||||
|
# =====EXAMPLES=====
|
||||||
|
.PHONY: build-example
|
||||||
|
build-example: $(BINS_EXAMPLE)
|
||||||
|
|
||||||
|
$(BINS_EXAMPLE): %: %.c.o $(LIB)
|
||||||
|
$(CC) \
|
||||||
|
$^ -o $@
|
||||||
|
|
||||||
|
# Example binaries link the resulting library
|
||||||
|
$(BUILD_DIR)/$(EXAMPLE_DIR)/%.c.o: $(EXAMPLE_DIR)/%.c
|
||||||
|
mkdir -p $(dir $@)
|
||||||
|
$(CC) $(_CFLAGS) -I$(PUB_INC_DIR) -c $< -o $@
|
||||||
|
|
||||||
|
# =====MAINTENANCE=====
|
||||||
|
.PHONY: lint
|
||||||
|
lint:
|
||||||
|
clang-format -n --Werror \
|
||||||
|
$(filter-out $(THIRDPARTY),$(SRCS)) \
|
||||||
|
$(filter-out $(THIRDPARTY),$(SRCS_H)) \
|
||||||
|
$(filter-out $(THIRDPARTY),$(SRCS_H_INTERNAL))
|
||||||
|
|
||||||
|
.PHONY: fmt
|
||||||
|
fmt:
|
||||||
|
clang-format -i \
|
||||||
|
$(filter-out $(THIRDPARTY),$(SRCS)) \
|
||||||
|
$(filter-out $(THIRDPARTY),$(SRCS_H)) \
|
||||||
|
$(filter-out $(THIRDPARTY),$(SRCS_H_INTERNAL))
|
||||||
|
|
||||||
|
.PHONY: check
|
||||||
|
check:
|
||||||
|
mkdir -p $(BUILD_DIR)/cppcheck
|
||||||
|
cppcheck \
|
||||||
|
$(addprefix -I,$(INC_DIRS)) \
|
||||||
|
--cppcheck-build-dir=$(BUILD_DIR)/cppcheck \
|
||||||
|
--error-exitcode=1 \
|
||||||
|
--enable=warning,style \
|
||||||
|
--inline-suppr \
|
||||||
|
--check-level=exhaustive \
|
||||||
|
--quiet \
|
||||||
|
-j$(shell nproc) \
|
||||||
|
$(filter-out $(THIRDPARTY),$(SRCS))
|
||||||
|
|
||||||
|
.PHONY: clean
|
||||||
|
clean:
|
||||||
|
rm -rf '$(BUILD_DIR)'
|
||||||
|
|
||||||
|
|
||||||
|
.PHONY: bear
|
||||||
|
bear: clean
|
||||||
|
bear -- make
|
||||||
|
bear --append -- make build-test
|
||||||
|
bear --append -- make build-example
|
||||||
|
|
||||||
|
|
||||||
|
# Make make aware of the .d files
|
||||||
|
-include $(DEPS)
|
|
@ -0,0 +1,17 @@
|
||||||
|
LIB_FILENAME = libmrk.a
|
||||||
|
|
||||||
|
BUILD_DIR = build
|
||||||
|
SRC_DIR = src
|
||||||
|
TEST_DIR = test
|
||||||
|
EXAMPLE_DIR = example
|
||||||
|
THIRDPARTY =
|
||||||
|
|
||||||
|
PUB_INC_DIR = include
|
||||||
|
INC_DIRS = $(PUB_INC_DIR) src/_include
|
||||||
|
|
||||||
|
# -MMD: generate a .d file for every source file. This file can be imported by
|
||||||
|
# make and makes make aware that a header file has been changed, ensuring an
|
||||||
|
# object file is also recompiled if only a header is changed.
|
||||||
|
# -MP: generate a dummy target for every header file (according to the docs it
|
||||||
|
# prevents some errors when removing header files)
|
||||||
|
CFLAGS ?= -MMD -MP -g
|
|
@ -0,0 +1,19 @@
|
||||||
|
#ifndef MRK_COMMON
|
||||||
|
#define MRK_COMMON
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#define MRK_CALLOC(out, n, size) \
|
||||||
|
{ \
|
||||||
|
void *temp = calloc(n, size); \
|
||||||
|
if (temp == NULL) \
|
||||||
|
return mrk_err_failed_alloc; \
|
||||||
|
*out = temp; \
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef enum mrk_err {
|
||||||
|
mrk_err_ok = 0,
|
||||||
|
mrk_err_failed_alloc,
|
||||||
|
} mrk_err;
|
||||||
|
|
||||||
|
#endif
|
|
@ -0,0 +1,51 @@
|
||||||
|
#ifndef MRK_LEXER
|
||||||
|
#define MRK_LEXER
|
||||||
|
|
||||||
|
#include <stdbool.h>
|
||||||
|
|
||||||
|
#include "mrk/common.h"
|
||||||
|
|
||||||
|
typedef struct mrk_lexer mrk_lexer;
|
||||||
|
|
||||||
|
typedef enum mrk_lexer_err {
|
||||||
|
mrk_lexer_err_ok = 0,
|
||||||
|
mrk_lexer_err_done,
|
||||||
|
mrk_lexer_err_unexpected_char,
|
||||||
|
} mrk_lexer_err;
|
||||||
|
|
||||||
|
typedef enum mrk_token_type {
|
||||||
|
mrk_token_type_pound = 0,
|
||||||
|
} mrk_token_type;
|
||||||
|
|
||||||
|
typedef struct mrk_token {
|
||||||
|
mrk_token_type type;
|
||||||
|
size_t start;
|
||||||
|
size_t end;
|
||||||
|
} mrk_token;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize a new lexer struct.
|
||||||
|
*/
|
||||||
|
mrk_err mrk_lexer_init(mrk_lexer **out);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Open the buffer with the given lexer struct. `buf` is expected to live for
|
||||||
|
* the duration of the lexing.
|
||||||
|
*
|
||||||
|
* The lexer will run either until `len` characters have been matched, or until
|
||||||
|
* a nul character has been reached. If `len` is set to 0, only the nul check is
|
||||||
|
* used to determine the end of the buffer.
|
||||||
|
*/
|
||||||
|
void mrk_lexer_open(mrk_lexer *lexer, const char *buf, size_t len);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns whether the lexer is done.
|
||||||
|
*/
|
||||||
|
bool mrk_lexer_at_end(const mrk_lexer *lexer);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Output the next lexed token for the given input.
|
||||||
|
*/
|
||||||
|
mrk_lexer_err mrk_lexer_next(mrk_token *out, mrk_lexer *lexer);
|
||||||
|
|
||||||
|
#endif
|
|
@ -0,0 +1,40 @@
|
||||||
|
#ifndef MRK_LEXER_INTERNAL
|
||||||
|
#define MRK_LEXER_INTERNAL
|
||||||
|
|
||||||
|
#include "mrk/lexer.h"
|
||||||
|
|
||||||
|
struct mrk_lexer {
|
||||||
|
struct {
|
||||||
|
const char *s;
|
||||||
|
size_t len;
|
||||||
|
} buf;
|
||||||
|
struct {
|
||||||
|
size_t line;
|
||||||
|
size_t line_index;
|
||||||
|
size_t buf_index;
|
||||||
|
} pos;
|
||||||
|
struct {
|
||||||
|
size_t start;
|
||||||
|
size_t end;
|
||||||
|
} token;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the next character that would be consumed by a call to advance. At the
|
||||||
|
* end of the buffer, this value is nul.
|
||||||
|
*/
|
||||||
|
char mrk_lexer_peek(mrk_lexer *lexer);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Advance the current position by one character, adding the new character to
|
||||||
|
* the curent token's context and returning it.
|
||||||
|
*/
|
||||||
|
char mrk_lexer_advance(mrk_lexer *lexer);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Output the currently matched token to the token struct with the given type,
|
||||||
|
* and reset the lexer's tracked token.
|
||||||
|
*/
|
||||||
|
void mrk_lexer_emit(mrk_token *out, mrk_lexer *lexer, mrk_token_type type);
|
||||||
|
|
||||||
|
#endif
|
|
@ -0,0 +1,73 @@
|
||||||
|
#include "mrk/lexer_internal.h"
|
||||||
|
|
||||||
|
mrk_err mrk_lexer_init(mrk_lexer **out) {
|
||||||
|
MRK_CALLOC(out, 1, sizeof(mrk_lexer));
|
||||||
|
|
||||||
|
return mrk_err_ok;
|
||||||
|
}
|
||||||
|
|
||||||
|
void mrk_lexer_open(mrk_lexer *lexer, const char *buf, size_t len) {
|
||||||
|
lexer->buf.s = buf;
|
||||||
|
lexer->buf.len = len;
|
||||||
|
lexer->pos.line = 0;
|
||||||
|
lexer->pos.buf_index = 0;
|
||||||
|
lexer->token.start = 0;
|
||||||
|
lexer->token.end = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool mrk_lexer_at_end(const mrk_lexer *lexer) {
|
||||||
|
return (lexer->buf.len > 0 && lexer->pos.buf_index == lexer->buf.len) ||
|
||||||
|
(lexer->buf.s[lexer->pos.buf_index] == '\0');
|
||||||
|
}
|
||||||
|
|
||||||
|
char mrk_lexer_advance(mrk_lexer *lexer) {
|
||||||
|
if (mrk_lexer_at_end(lexer)) {
|
||||||
|
return '\0';
|
||||||
|
}
|
||||||
|
|
||||||
|
char c = lexer->buf.s[lexer->pos.buf_index];
|
||||||
|
|
||||||
|
// A newline is still part of the previous line, so if the last character was
|
||||||
|
// a newline, we now go to the next line
|
||||||
|
if (lexer->buf.s[lexer->pos.buf_index] == '\0') {
|
||||||
|
lexer->pos.line++;
|
||||||
|
lexer->pos.line_index = 0;
|
||||||
|
} else {
|
||||||
|
lexer->pos.line_index++;
|
||||||
|
}
|
||||||
|
|
||||||
|
lexer->pos.buf_index++;
|
||||||
|
lexer->token.end++;
|
||||||
|
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
char mrk_lexer_peek(mrk_lexer *lexer) {
|
||||||
|
if (mrk_lexer_at_end(lexer)) {
|
||||||
|
return '\0';
|
||||||
|
}
|
||||||
|
|
||||||
|
return lexer->buf.s[lexer->pos.buf_index];
|
||||||
|
}
|
||||||
|
|
||||||
|
void mrk_lexer_emit(mrk_token *out, mrk_lexer *lexer, mrk_token_type type) {
|
||||||
|
out->type = type;
|
||||||
|
out->start = lexer->token.start;
|
||||||
|
out->start = lexer->token.end;
|
||||||
|
|
||||||
|
lexer->token.start = lexer->token.end;
|
||||||
|
}
|
||||||
|
|
||||||
|
mrk_lexer_err mrk_lexer_next(mrk_token *out, mrk_lexer *lexer) {
|
||||||
|
if (mrk_lexer_at_end(lexer)) {
|
||||||
|
return mrk_lexer_err_done;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (mrk_lexer_advance(lexer)) {
|
||||||
|
case '#':
|
||||||
|
mrk_lexer_emit(out, lexer, mrk_token_type_pound);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return mrk_lexer_err_ok;
|
||||||
|
}
|
Loading…
Reference in New Issue