From d235bd000087a84a87bc14f35809d1247e1fe6a3 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Tue, 19 Mar 2024 14:39:54 +0100 Subject: [PATCH] feat(parser): lay groundwork for proper error handling --- include/mrk/common.h | 3 +-- include/mrk/lexer.h | 2 ++ include/mrk/parser.h | 17 +++++++++++++ src/_include/mrk/lexer_internal.h | 8 ++----- src/_include/mrk/parser_internal.h | 7 ++++-- src/lexer/lexer.c | 38 +++++++++++++++++------------- src/parser/err.c | 29 +++++++++++++++++++++++ src/parser/parser.c | 9 ++++--- src/parser/util.c | 5 ++-- 9 files changed, 87 insertions(+), 31 deletions(-) create mode 100644 src/parser/err.c diff --git a/include/mrk/common.h b/include/mrk/common.h index b75eff7..ca7ce34 100644 --- a/include/mrk/common.h +++ b/include/mrk/common.h @@ -38,8 +38,7 @@ typedef enum mrk_err { mrk_err_ok = 0, mrk_err_failed_alloc, - mrk_err_unexpected_token, - mrk_err_invalid, + mrk_err_invalid_md, } mrk_err; #endif diff --git a/include/mrk/lexer.h b/include/mrk/lexer.h index 9655ced..bf1067f 100644 --- a/include/mrk/lexer.h +++ b/include/mrk/lexer.h @@ -48,6 +48,8 @@ typedef struct mrk_token { mrk_token_type type; size_t start; size_t end; + size_t start_line; + size_t start_line_index; } mrk_token; /** diff --git a/include/mrk/parser.h b/include/mrk/parser.h index 8bd0c33..7674145 100644 --- a/include/mrk/parser.h +++ b/include/mrk/parser.h @@ -9,6 +9,12 @@ typedef struct mrk_parser mrk_parser; +typedef enum mrk_parser_err { + mrk_parser_err_ok = 0, + mrk_parser_err_unexpected_token, + mrk_parser_unclosed_brackets, +} mrk_parser_err; + /** * Initialize a new parser struct. */ @@ -18,4 +24,15 @@ void mrk_parser_open(mrk_parser *parser, mrk_lexer *lexer); mrk_err mrk_parser_parse(mrk_ast_node **out, mrk_parser *parser); +/** + * Return the error code of the last parser method that returned `invalid_md`. + */ +mrk_parser_err mrk_parser_err_code(mrk_parser *parser); + +/** + * Return a nul-terminated string containing a textual description of the last + * error. The string is managed internally and shoul not be freed by the caller. + */ +const char *mrk_parser_err_msg(mrk_parser *parser); + #endif diff --git a/src/_include/mrk/lexer_internal.h b/src/_include/mrk/lexer_internal.h index d2c9bff..683e1ac 100644 --- a/src/_include/mrk/lexer_internal.h +++ b/src/_include/mrk/lexer_internal.h @@ -13,12 +13,8 @@ struct mrk_lexer { size_t line_index; size_t buf_index; } pos; - struct { - size_t start; - size_t end; - bool emitted; - } token; - mrk_token_type last_emitted; + mrk_token cur_token; + mrk_token last_emitted; }; /** diff --git a/src/_include/mrk/parser_internal.h b/src/_include/mrk/parser_internal.h index 450babd..9a3c6f1 100644 --- a/src/_include/mrk/parser_internal.h +++ b/src/_include/mrk/parser_internal.h @@ -4,7 +4,8 @@ #include "mrk/lexer.h" #include "mrk/parser.h" -#define MRK_PARSER_LOOKAHEAD_BUF_SIZE 4 +#define MRK_PARSER_ERRMSG_BUF 256 + #define MRK_PARSE_ERR(p, t, m) \ p->error.token = t; \ p->error.msg = m; \ @@ -16,8 +17,10 @@ struct mrk_parser { // Indentation of the current line size_t indent; struct { + mrk_parser_err code; mrk_token token; - const char *msg; + mrk_token_type expected_token_type; + char buf[MRK_PARSER_ERRMSG_BUF]; } error; }; diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c index d7aba98..92046ac 100644 --- a/src/lexer/lexer.c +++ b/src/lexer/lexer.c @@ -22,12 +22,18 @@ void mrk_lexer_free(mrk_lexer *lexer) { void mrk_lexer_open(mrk_lexer *lexer, const char *buf, size_t len) { lexer->buf.s = buf; lexer->buf.len = len; + lexer->pos.line = 0; + lexer->pos.line_index = 0; lexer->pos.buf_index = 0; - lexer->token.start = 0; - lexer->token.end = 0; - lexer->token.emitted = false; - lexer->last_emitted = mrk_token_type_none; + + lexer->cur_token.type = mrk_token_type_none; + lexer->cur_token.start = 0; + lexer->cur_token.end = 0; + lexer->cur_token.start_line = 0; + lexer->cur_token.start_line_index = 0; + + lexer->last_emitted = lexer->cur_token; } bool mrk_lexer_done(const mrk_lexer *lexer) { @@ -52,7 +58,7 @@ char mrk_lexer_advance(mrk_lexer *lexer) { } lexer->pos.buf_index++; - lexer->token.end++; + lexer->cur_token.end++; return c; } @@ -118,18 +124,17 @@ char mrk_lexer_peek_n(mrk_lexer *lexer, size_t n) { } void mrk_lexer_reset(mrk_lexer *lexer) { - lexer->token.start = lexer->pos.buf_index; - lexer->token.end = lexer->pos.buf_index; - lexer->token.emitted = false; + lexer->cur_token.start = lexer->pos.buf_index; + lexer->cur_token.end = lexer->pos.buf_index; + lexer->cur_token.start_line = lexer->pos.line; + lexer->cur_token.start_line_index = lexer->pos.line_index; } void mrk_lexer_emit(mrk_token *out, mrk_lexer *lexer, mrk_token_type type) { - out->type = type; - out->start = lexer->token.start; - out->end = lexer->token.end; + lexer->cur_token.type = type; + *out = lexer->cur_token; - lexer->token.emitted = true; - lexer->last_emitted = type; + lexer->last_emitted = lexer->cur_token; } void mrk_lexer_advance_text(mrk_lexer *lexer) { @@ -152,7 +157,7 @@ void mrk_lexer_lex_start_of_line(mrk_token *out, mrk_lexer *lexer) { case '#': mrk_lexer_advance_eq(lexer, c); - if (lexer->token.end - lexer->token.start <= MRK_MAX_HEADER_LEN) { + if (lexer->cur_token.end - lexer->cur_token.start <= MRK_MAX_HEADER_LEN) { mrk_lexer_emit(out, lexer, mrk_token_type_header_start); } else { mrk_lexer_advance_text(lexer); @@ -166,7 +171,8 @@ void mrk_lexer_lex_start_of_line(mrk_token *out, mrk_lexer *lexer) { } else { mrk_lexer_advance_eq(lexer, c); - if (lexer->token.end - lexer->token.start >= MRK_MIN_HORIZ_RULE_LEN && + if (lexer->cur_token.end - lexer->cur_token.start >= + MRK_MIN_HORIZ_RULE_LEN && mrk_lexer_peek(lexer) == '\n') { mrk_lexer_emit(out, lexer, mrk_token_type_horizontal_rule); } else { @@ -372,7 +378,7 @@ mrk_lexer_err mrk_lexer_next(mrk_token *out, mrk_lexer *lexer) { mrk_lexer_reset(lexer); if (lexer->pos.line_index == 0 || - lexer->last_emitted == mrk_token_type_indent) { + lexer->last_emitted.type == mrk_token_type_indent) { mrk_lexer_lex_start_of_line(out, lexer); } else { mrk_lexer_lex_middle_of_line(out, lexer); diff --git a/src/parser/err.c b/src/parser/err.c new file mode 100644 index 0000000..77aaeca --- /dev/null +++ b/src/parser/err.c @@ -0,0 +1,29 @@ +#include + +#include "mrk/parser_internal.h" + +mrk_parser_err mrk_parser_err_code(mrk_parser *parser) { + return parser->error.code; +} + +const char *mrk_parser_err_msg(mrk_parser *parser) { + switch (parser->error.code) { + case mrk_parser_err_ok: + parser->error.buf[0] = '\0'; + break; + case mrk_parser_err_unexpected_token: + sprintf(parser->error.buf, + "%lu:%lu: unexpected token, expected type %i but got %i", + parser->error.token.start_line, + parser->error.token.start_line_index, + parser->error.expected_token_type, parser->error.token.type); + break; + case mrk_parser_unclosed_brackets: + sprintf(parser->error.buf, "%lu:%lu: unclosed bracket", + parser->error.token.start_line, + parser->error.token.start_line_index); + break; + } + + return parser->error.buf; +} diff --git a/src/parser/parser.c b/src/parser/parser.c index d1a4ddb..0ddfda0 100644 --- a/src/parser/parser.c +++ b/src/parser/parser.c @@ -120,11 +120,14 @@ mrk_err mrk_parser_parse_common(mrk_parser *parser, mrk_ast_node *parent) { mrk_err mrk_parser_parse_link(mrk_ast_node *out, mrk_parser *parser) { out->type = mrk_ast_node_type_link; - MRK_RES(mrk_parser_eat(NULL, parser, mrk_token_type_left_bracket)); + mrk_token left_bracket; + MRK_RES(mrk_parser_eat(&left_bracket, parser, mrk_token_type_left_bracket)); if (mrk_parser_done(parser)) { - parser->error.msg = "Unclosed brackets"; - return mrk_err_invalid; + parser->error.code = mrk_parser_unclosed_brackets; + parser->error.token = left_bracket; + + return mrk_err_invalid_md; } mrk_ast_node *child; diff --git a/src/parser/util.c b/src/parser/util.c index 35ac83f..a2df712 100644 --- a/src/parser/util.c +++ b/src/parser/util.c @@ -22,10 +22,11 @@ void mrk_parser_advance(mrk_parser *parser) { mrk_err mrk_parser_eat(mrk_token *out, mrk_parser *parser, mrk_token_type type) { if (mrk_parser_done(parser) || mrk_parser_peek(parser).type != type) { - parser->error.msg = "Unexpected token"; + parser->error.code = mrk_parser_err_unexpected_token; parser->error.token = mrk_parser_peek(parser); + parser->error.expected_token_type = type; - return mrk_err_unexpected_token; + return mrk_err_invalid_md; } if (out != NULL) {