From 35aae48a7ff038c20b499f065b59744e78f65d5d Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Thu, 7 Mar 2024 14:57:01 +0100 Subject: [PATCH] feat(parser): lay groundworks for parser --- include/mrk/ast.h | 30 ++++++++++++++++++++++++ include/mrk/common.h | 16 +++++++++++++ include/mrk/lexer.h | 3 ++- include/mrk/parser.h | 19 +++++++++++++++ src/_include/mrk/parser_internal.h | 37 ++++++++++++++++++++++++++++++ src/ast.c | 25 ++++++++++++++++++++ src/parser/parser.c | 29 +++++++++++++++++++++++ src/parser/util.c | 34 +++++++++++++++++++++++++++ 8 files changed, 192 insertions(+), 1 deletion(-) create mode 100644 include/mrk/ast.h create mode 100644 include/mrk/parser.h create mode 100644 src/_include/mrk/parser_internal.h create mode 100644 src/ast.c create mode 100644 src/parser/parser.c create mode 100644 src/parser/util.c diff --git a/include/mrk/ast.h b/include/mrk/ast.h new file mode 100644 index 0000000..0ca008f --- /dev/null +++ b/include/mrk/ast.h @@ -0,0 +1,30 @@ +#ifndef MRK_AST +#define MRK_AST + +#include + +#include "mrk/common.h" + +#define MRK_AST_NODE_ARGS 1 + +typedef enum mrk_ast_node_type { + mrk_ast_node_type_header = 0, +} mrk_ast_node_type; + +typedef struct mrk_ast_node { + struct { + struct mrk_ast_node **arr; + size_t len; + } children; + mrk_ast_node_type type; + void *args[MRK_AST_NODE_ARGS]; +} mrk_ast_node; + +/** + * Initialize a new AST node struct. + */ +mrk_err mrk_ast_node_init(mrk_ast_node **out); + +mrk_err mrk_ast_node_child_append(mrk_ast_node *parent, mrk_ast_node *child); + +#endif diff --git a/include/mrk/common.h b/include/mrk/common.h index 54163c5..f621949 100644 --- a/include/mrk/common.h +++ b/include/mrk/common.h @@ -11,6 +11,22 @@ *out = temp; \ } +#define MRK_RES(x) \ + { \ + mrk_err res = x; \ + if (res != mrk_err_ok) \ + return res; \ + } + +#define MRK_RES2(x, e) \ + { \ + mrk_err res = x; \ + if (res != mrk_err_ok) { \ + e; \ + return res; \ + } \ + } + typedef enum mrk_err { mrk_err_ok = 0, mrk_err_failed_alloc, diff --git a/include/mrk/lexer.h b/include/mrk/lexer.h index 9d44088..49c7af9 100644 --- a/include/mrk/lexer.h +++ b/include/mrk/lexer.h @@ -14,7 +14,8 @@ typedef enum mrk_lexer_err { } mrk_lexer_err; typedef enum mrk_token_type { - mrk_token_type_pounds = 0, + mrk_token_type_none = 0, + mrk_token_type_pounds, mrk_token_type_backticks, mrk_token_type_dashes, mrk_token_type_underscores, diff --git a/include/mrk/parser.h b/include/mrk/parser.h new file mode 100644 index 0000000..ace979c --- /dev/null +++ b/include/mrk/parser.h @@ -0,0 +1,19 @@ +#ifndef MRK_PARSER +#define MRK_PARSER + +#include "mrk/ast.h" +#include "mrk/common.h" +#include "mrk/lexer.h" + +typedef struct mrk_parser mrk_parser; + +/** + * Initialize a new parser struct. + */ +mrk_err mrk_parser_init(mrk_parser **out); + +void mrk_parser_open(mrk_parser *parser, mrk_lexer *lexer); + +mrk_err mrk_parser_parse(mrk_ast_node **out, mrk_parser *parser); + +#endif diff --git a/src/_include/mrk/parser_internal.h b/src/_include/mrk/parser_internal.h new file mode 100644 index 0000000..9a567c1 --- /dev/null +++ b/src/_include/mrk/parser_internal.h @@ -0,0 +1,37 @@ +#ifndef MRK_PARSER_INTERNAL +#define MRK_PARSER_INTERNAL + +#include "mrk/lexer_internal.h" +#include "mrk/parser.h" + +#define MRK_PARSER_LOOKAHEAD_BUF_SIZE 4 + +struct mrk_parser { + mrk_lexer *lexer; + struct { + mrk_token buf[MRK_PARSER_LOOKAHEAD_BUF_SIZE]; + size_t index; + } lookahead; +}; + +/** + * Returns true if all the data from the lexer has been processed. + * + * Because the parser might buffer parts of the results of the lexer, this is + * not equivalent to calling lexer done. + */ +bool mrk_parser_done(mrk_parser *parser); + +/** + * Return the next token that would be consumed by a call to advance. + */ +mrk_token mrk_parser_peek(mrk_parser *parser); + +/** + * Advance the processing stream by one token. + */ +void mrk_parser_advance(mrk_parser *parser); + +mrk_err mrk_parser_parse_block(mrk_ast_node **out, mrk_parser *parser); + +#endif diff --git a/src/ast.c b/src/ast.c new file mode 100644 index 0000000..e4701c2 --- /dev/null +++ b/src/ast.c @@ -0,0 +1,25 @@ +#include "mrk/ast.h" + +mrk_err mrk_ast_node_init(mrk_ast_node **out) { + MRK_CALLOC(out, 1, sizeof(mrk_ast_node)); + + return mrk_err_ok; +} + +mrk_err mrk_ast_node_child_append(mrk_ast_node *parent, mrk_ast_node *child) { + mrk_ast_node **children = + parent->children.len == 0 + ? malloc(sizeof(mrk_ast_node *)) + : realloc(parent->children.arr, + sizeof(mrk_ast_node *) * (parent->children.len + 1)); + + if (children == NULL) { + return mrk_err_failed_alloc; + } + + children[parent->children.len] = child; + parent->children.arr = children; + parent->children.len++; + + return mrk_err_ok; +} diff --git a/src/parser/parser.c b/src/parser/parser.c new file mode 100644 index 0000000..671db05 --- /dev/null +++ b/src/parser/parser.c @@ -0,0 +1,29 @@ +#include "mrk/parser_internal.h" + +void mrk_parser_open(mrk_parser *parser, mrk_lexer *lexer) { + parser->lexer = lexer; + + // Prefill lookahead buffer + for (size_t i = 0; + i < MRK_PARSER_LOOKAHEAD_BUF_SIZE && !mrk_lexer_done(lexer); i++) { + mrk_lexer_next(&parser->lookahead.buf[i], lexer); + } +} + +mrk_err mrk_parser_parse(mrk_ast_node **out, mrk_parser *parser) { + mrk_ast_node *root; + MRK_RES(mrk_ast_node_init(&root)); + + while (!mrk_lexer_done(parser->lexer)) { + mrk_ast_node *block_root; + + MRK_RES(mrk_parser_parse_block(&block_root, parser)); + MRK_RES(mrk_ast_node_child_append(root, block_root)); + } + + *out = root; + + return mrk_err_ok; +} + +mrk_err mrk_parser_parse_block(mrk_ast_node **out, mrk_parser *parser) {} diff --git a/src/parser/util.c b/src/parser/util.c new file mode 100644 index 0000000..53d9392 --- /dev/null +++ b/src/parser/util.c @@ -0,0 +1,34 @@ +#include "mrk/parser_internal.h" + +bool mrk_parser_done(mrk_parser *parser) { + return mrk_lexer_done(parser->lexer) && + parser->lookahead.buf[parser->lookahead.index].type == + mrk_token_type_none; +} + +mrk_token mrk_parser_peek(mrk_parser *parser) { + mrk_token out; + + if (mrk_parser_done(parser)) { + out.type = mrk_token_type_none; + } else { + out = parser->lookahead.buf[parser->lookahead.index]; + } + + return out; +} + +void mrk_parser_advance(mrk_parser *parser) { + if (mrk_parser_done(parser)) { + return; + } + + // Append new element of the lexer to the lookahead buffer + if (!mrk_lexer_done(parser->lexer)) { + mrk_lexer_next(&parser->lookahead.buf[parser->lookahead.index], + parser->lexer); + } + + parser->lookahead.index = + (parser->lookahead.index + 1) % MRK_PARSER_LOOKAHEAD_BUF_SIZE; +}