feat(parser): lay groundwork for proper error handling
parent
f65942697e
commit
d235bd0000
|
@ -38,8 +38,7 @@
|
||||||
typedef enum mrk_err {
|
typedef enum mrk_err {
|
||||||
mrk_err_ok = 0,
|
mrk_err_ok = 0,
|
||||||
mrk_err_failed_alloc,
|
mrk_err_failed_alloc,
|
||||||
mrk_err_unexpected_token,
|
mrk_err_invalid_md,
|
||||||
mrk_err_invalid,
|
|
||||||
} mrk_err;
|
} mrk_err;
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -48,6 +48,8 @@ typedef struct mrk_token {
|
||||||
mrk_token_type type;
|
mrk_token_type type;
|
||||||
size_t start;
|
size_t start;
|
||||||
size_t end;
|
size_t end;
|
||||||
|
size_t start_line;
|
||||||
|
size_t start_line_index;
|
||||||
} mrk_token;
|
} mrk_token;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -9,6 +9,12 @@
|
||||||
|
|
||||||
typedef struct mrk_parser mrk_parser;
|
typedef struct mrk_parser mrk_parser;
|
||||||
|
|
||||||
|
typedef enum mrk_parser_err {
|
||||||
|
mrk_parser_err_ok = 0,
|
||||||
|
mrk_parser_err_unexpected_token,
|
||||||
|
mrk_parser_unclosed_brackets,
|
||||||
|
} mrk_parser_err;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Initialize a new parser struct.
|
* Initialize a new parser struct.
|
||||||
*/
|
*/
|
||||||
|
@ -18,4 +24,15 @@ void mrk_parser_open(mrk_parser *parser, mrk_lexer *lexer);
|
||||||
|
|
||||||
mrk_err mrk_parser_parse(mrk_ast_node **out, mrk_parser *parser);
|
mrk_err mrk_parser_parse(mrk_ast_node **out, mrk_parser *parser);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the error code of the last parser method that returned `invalid_md`.
|
||||||
|
*/
|
||||||
|
mrk_parser_err mrk_parser_err_code(mrk_parser *parser);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return a nul-terminated string containing a textual description of the last
|
||||||
|
* error. The string is managed internally and shoul not be freed by the caller.
|
||||||
|
*/
|
||||||
|
const char *mrk_parser_err_msg(mrk_parser *parser);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -13,12 +13,8 @@ struct mrk_lexer {
|
||||||
size_t line_index;
|
size_t line_index;
|
||||||
size_t buf_index;
|
size_t buf_index;
|
||||||
} pos;
|
} pos;
|
||||||
struct {
|
mrk_token cur_token;
|
||||||
size_t start;
|
mrk_token last_emitted;
|
||||||
size_t end;
|
|
||||||
bool emitted;
|
|
||||||
} token;
|
|
||||||
mrk_token_type last_emitted;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -4,7 +4,8 @@
|
||||||
#include "mrk/lexer.h"
|
#include "mrk/lexer.h"
|
||||||
#include "mrk/parser.h"
|
#include "mrk/parser.h"
|
||||||
|
|
||||||
#define MRK_PARSER_LOOKAHEAD_BUF_SIZE 4
|
#define MRK_PARSER_ERRMSG_BUF 256
|
||||||
|
|
||||||
#define MRK_PARSE_ERR(p, t, m) \
|
#define MRK_PARSE_ERR(p, t, m) \
|
||||||
p->error.token = t; \
|
p->error.token = t; \
|
||||||
p->error.msg = m; \
|
p->error.msg = m; \
|
||||||
|
@ -16,8 +17,10 @@ struct mrk_parser {
|
||||||
// Indentation of the current line
|
// Indentation of the current line
|
||||||
size_t indent;
|
size_t indent;
|
||||||
struct {
|
struct {
|
||||||
|
mrk_parser_err code;
|
||||||
mrk_token token;
|
mrk_token token;
|
||||||
const char *msg;
|
mrk_token_type expected_token_type;
|
||||||
|
char buf[MRK_PARSER_ERRMSG_BUF];
|
||||||
} error;
|
} error;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -22,12 +22,18 @@ void mrk_lexer_free(mrk_lexer *lexer) {
|
||||||
void mrk_lexer_open(mrk_lexer *lexer, const char *buf, size_t len) {
|
void mrk_lexer_open(mrk_lexer *lexer, const char *buf, size_t len) {
|
||||||
lexer->buf.s = buf;
|
lexer->buf.s = buf;
|
||||||
lexer->buf.len = len;
|
lexer->buf.len = len;
|
||||||
|
|
||||||
lexer->pos.line = 0;
|
lexer->pos.line = 0;
|
||||||
|
lexer->pos.line_index = 0;
|
||||||
lexer->pos.buf_index = 0;
|
lexer->pos.buf_index = 0;
|
||||||
lexer->token.start = 0;
|
|
||||||
lexer->token.end = 0;
|
lexer->cur_token.type = mrk_token_type_none;
|
||||||
lexer->token.emitted = false;
|
lexer->cur_token.start = 0;
|
||||||
lexer->last_emitted = mrk_token_type_none;
|
lexer->cur_token.end = 0;
|
||||||
|
lexer->cur_token.start_line = 0;
|
||||||
|
lexer->cur_token.start_line_index = 0;
|
||||||
|
|
||||||
|
lexer->last_emitted = lexer->cur_token;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool mrk_lexer_done(const mrk_lexer *lexer) {
|
bool mrk_lexer_done(const mrk_lexer *lexer) {
|
||||||
|
@ -52,7 +58,7 @@ char mrk_lexer_advance(mrk_lexer *lexer) {
|
||||||
}
|
}
|
||||||
|
|
||||||
lexer->pos.buf_index++;
|
lexer->pos.buf_index++;
|
||||||
lexer->token.end++;
|
lexer->cur_token.end++;
|
||||||
|
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
@ -118,18 +124,17 @@ char mrk_lexer_peek_n(mrk_lexer *lexer, size_t n) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void mrk_lexer_reset(mrk_lexer *lexer) {
|
void mrk_lexer_reset(mrk_lexer *lexer) {
|
||||||
lexer->token.start = lexer->pos.buf_index;
|
lexer->cur_token.start = lexer->pos.buf_index;
|
||||||
lexer->token.end = lexer->pos.buf_index;
|
lexer->cur_token.end = lexer->pos.buf_index;
|
||||||
lexer->token.emitted = false;
|
lexer->cur_token.start_line = lexer->pos.line;
|
||||||
|
lexer->cur_token.start_line_index = lexer->pos.line_index;
|
||||||
}
|
}
|
||||||
|
|
||||||
void mrk_lexer_emit(mrk_token *out, mrk_lexer *lexer, mrk_token_type type) {
|
void mrk_lexer_emit(mrk_token *out, mrk_lexer *lexer, mrk_token_type type) {
|
||||||
out->type = type;
|
lexer->cur_token.type = type;
|
||||||
out->start = lexer->token.start;
|
*out = lexer->cur_token;
|
||||||
out->end = lexer->token.end;
|
|
||||||
|
|
||||||
lexer->token.emitted = true;
|
lexer->last_emitted = lexer->cur_token;
|
||||||
lexer->last_emitted = type;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void mrk_lexer_advance_text(mrk_lexer *lexer) {
|
void mrk_lexer_advance_text(mrk_lexer *lexer) {
|
||||||
|
@ -152,7 +157,7 @@ void mrk_lexer_lex_start_of_line(mrk_token *out, mrk_lexer *lexer) {
|
||||||
case '#':
|
case '#':
|
||||||
mrk_lexer_advance_eq(lexer, c);
|
mrk_lexer_advance_eq(lexer, c);
|
||||||
|
|
||||||
if (lexer->token.end - lexer->token.start <= MRK_MAX_HEADER_LEN) {
|
if (lexer->cur_token.end - lexer->cur_token.start <= MRK_MAX_HEADER_LEN) {
|
||||||
mrk_lexer_emit(out, lexer, mrk_token_type_header_start);
|
mrk_lexer_emit(out, lexer, mrk_token_type_header_start);
|
||||||
} else {
|
} else {
|
||||||
mrk_lexer_advance_text(lexer);
|
mrk_lexer_advance_text(lexer);
|
||||||
|
@ -166,7 +171,8 @@ void mrk_lexer_lex_start_of_line(mrk_token *out, mrk_lexer *lexer) {
|
||||||
} else {
|
} else {
|
||||||
mrk_lexer_advance_eq(lexer, c);
|
mrk_lexer_advance_eq(lexer, c);
|
||||||
|
|
||||||
if (lexer->token.end - lexer->token.start >= MRK_MIN_HORIZ_RULE_LEN &&
|
if (lexer->cur_token.end - lexer->cur_token.start >=
|
||||||
|
MRK_MIN_HORIZ_RULE_LEN &&
|
||||||
mrk_lexer_peek(lexer) == '\n') {
|
mrk_lexer_peek(lexer) == '\n') {
|
||||||
mrk_lexer_emit(out, lexer, mrk_token_type_horizontal_rule);
|
mrk_lexer_emit(out, lexer, mrk_token_type_horizontal_rule);
|
||||||
} else {
|
} else {
|
||||||
|
@ -372,7 +378,7 @@ mrk_lexer_err mrk_lexer_next(mrk_token *out, mrk_lexer *lexer) {
|
||||||
mrk_lexer_reset(lexer);
|
mrk_lexer_reset(lexer);
|
||||||
|
|
||||||
if (lexer->pos.line_index == 0 ||
|
if (lexer->pos.line_index == 0 ||
|
||||||
lexer->last_emitted == mrk_token_type_indent) {
|
lexer->last_emitted.type == mrk_token_type_indent) {
|
||||||
mrk_lexer_lex_start_of_line(out, lexer);
|
mrk_lexer_lex_start_of_line(out, lexer);
|
||||||
} else {
|
} else {
|
||||||
mrk_lexer_lex_middle_of_line(out, lexer);
|
mrk_lexer_lex_middle_of_line(out, lexer);
|
||||||
|
|
|
@ -0,0 +1,29 @@
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include "mrk/parser_internal.h"
|
||||||
|
|
||||||
|
mrk_parser_err mrk_parser_err_code(mrk_parser *parser) {
|
||||||
|
return parser->error.code;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *mrk_parser_err_msg(mrk_parser *parser) {
|
||||||
|
switch (parser->error.code) {
|
||||||
|
case mrk_parser_err_ok:
|
||||||
|
parser->error.buf[0] = '\0';
|
||||||
|
break;
|
||||||
|
case mrk_parser_err_unexpected_token:
|
||||||
|
sprintf(parser->error.buf,
|
||||||
|
"%lu:%lu: unexpected token, expected type %i but got %i",
|
||||||
|
parser->error.token.start_line,
|
||||||
|
parser->error.token.start_line_index,
|
||||||
|
parser->error.expected_token_type, parser->error.token.type);
|
||||||
|
break;
|
||||||
|
case mrk_parser_unclosed_brackets:
|
||||||
|
sprintf(parser->error.buf, "%lu:%lu: unclosed bracket",
|
||||||
|
parser->error.token.start_line,
|
||||||
|
parser->error.token.start_line_index);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return parser->error.buf;
|
||||||
|
}
|
|
@ -120,11 +120,14 @@ mrk_err mrk_parser_parse_common(mrk_parser *parser, mrk_ast_node *parent) {
|
||||||
mrk_err mrk_parser_parse_link(mrk_ast_node *out, mrk_parser *parser) {
|
mrk_err mrk_parser_parse_link(mrk_ast_node *out, mrk_parser *parser) {
|
||||||
out->type = mrk_ast_node_type_link;
|
out->type = mrk_ast_node_type_link;
|
||||||
|
|
||||||
MRK_RES(mrk_parser_eat(NULL, parser, mrk_token_type_left_bracket));
|
mrk_token left_bracket;
|
||||||
|
MRK_RES(mrk_parser_eat(&left_bracket, parser, mrk_token_type_left_bracket));
|
||||||
|
|
||||||
if (mrk_parser_done(parser)) {
|
if (mrk_parser_done(parser)) {
|
||||||
parser->error.msg = "Unclosed brackets";
|
parser->error.code = mrk_parser_unclosed_brackets;
|
||||||
return mrk_err_invalid;
|
parser->error.token = left_bracket;
|
||||||
|
|
||||||
|
return mrk_err_invalid_md;
|
||||||
}
|
}
|
||||||
|
|
||||||
mrk_ast_node *child;
|
mrk_ast_node *child;
|
||||||
|
|
|
@ -22,10 +22,11 @@ void mrk_parser_advance(mrk_parser *parser) {
|
||||||
mrk_err mrk_parser_eat(mrk_token *out, mrk_parser *parser,
|
mrk_err mrk_parser_eat(mrk_token *out, mrk_parser *parser,
|
||||||
mrk_token_type type) {
|
mrk_token_type type) {
|
||||||
if (mrk_parser_done(parser) || mrk_parser_peek(parser).type != type) {
|
if (mrk_parser_done(parser) || mrk_parser_peek(parser).type != type) {
|
||||||
parser->error.msg = "Unexpected token";
|
parser->error.code = mrk_parser_err_unexpected_token;
|
||||||
parser->error.token = mrk_parser_peek(parser);
|
parser->error.token = mrk_parser_peek(parser);
|
||||||
|
parser->error.expected_token_type = type;
|
||||||
|
|
||||||
return mrk_err_unexpected_token;
|
return mrk_err_invalid_md;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (out != NULL) {
|
if (out != NULL) {
|
||||||
|
|
Loading…
Reference in New Issue