feat: lex and parse single-line code blocks
parent
2387461e21
commit
c653d437bd
|
@ -15,6 +15,7 @@ typedef enum mrk_ast_node_type {
|
|||
mrk_ast_node_type_paragraph,
|
||||
mrk_ast_node_type_list,
|
||||
mrk_ast_node_type_list_item,
|
||||
mrk_ast_node_type_code,
|
||||
} mrk_ast_node_type;
|
||||
|
||||
typedef struct mrk_ast_node {
|
||||
|
|
|
@ -13,8 +13,9 @@ typedef enum mrk_parser_err {
|
|||
mrk_parser_err_ok = 0,
|
||||
mrk_parser_err_unexpected_eat,
|
||||
mrk_parser_err_unexpected_token,
|
||||
mrk_parser_unclosed_brackets,
|
||||
mrk_parser_unexpected_path,
|
||||
mrk_parser_err_unclosed_brackets,
|
||||
mrk_parser_err_unclosed_backticks,
|
||||
mrk_parser_err_unexpected_path,
|
||||
} mrk_parser_err;
|
||||
|
||||
/**
|
||||
|
|
|
@ -92,4 +92,9 @@ mrk_err mrk_parser_parse_list(mrk_ast_node *out, mrk_parser *parser);
|
|||
|
||||
mrk_err mrk_parser_parse_list_item(mrk_ast_node *out, mrk_parser *parser);
|
||||
|
||||
/**
|
||||
* Parse a single-line code segment.
|
||||
*/
|
||||
mrk_err mrk_parser_parse_code(mrk_ast_node *out, mrk_parser *parser);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -286,7 +286,6 @@ void mrk_lexer_lex_start_of_line(mrk_token *out, mrk_lexer *lexer) {
|
|||
mrk_lexer_advance_n(lexer, 2);
|
||||
mrk_lexer_emit(out, lexer, mrk_token_type_triple_backtick);
|
||||
} else {
|
||||
mrk_lexer_advance(lexer);
|
||||
mrk_lexer_emit(out, lexer, mrk_token_type_backtick);
|
||||
}
|
||||
break;
|
||||
|
@ -373,7 +372,6 @@ void mrk_lexer_lex_middle_of_line(mrk_token *out, mrk_lexer *lexer) {
|
|||
}
|
||||
} break;
|
||||
case '`':
|
||||
mrk_lexer_advance(lexer);
|
||||
mrk_lexer_emit(out, lexer, mrk_token_type_backtick);
|
||||
break;
|
||||
default:
|
||||
|
|
|
@ -32,12 +32,12 @@ const char *mrk_parser_err_msg(mrk_parser *parser) {
|
|||
mrk_token_type_names[parser->error.token.type],
|
||||
parser->error.token.type);
|
||||
break;
|
||||
case mrk_parser_unclosed_brackets:
|
||||
case mrk_parser_err_unclosed_brackets:
|
||||
sprintf(parser->error.buf, "%lu:%lu: unclosed bracket",
|
||||
parser->error.token.start_line + 1,
|
||||
parser->error.token.start_line_index + 1);
|
||||
break;
|
||||
case mrk_parser_unexpected_path:
|
||||
case mrk_parser_err_unexpected_path:
|
||||
return unexpected_path_msg;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
#include "mrk/parser.h"
|
||||
#include "mrk/lexer.h"
|
||||
#include "mrk/parser_internal.h"
|
||||
#include <stdio.h>
|
||||
|
||||
|
@ -93,7 +95,9 @@ mrk_err mrk_parser_parse_common(mrk_parser *parser, mrk_ast_node *parent) {
|
|||
while (!mrk_parser_done(parser) && parser->indent == indent) {
|
||||
mrk_ast_node *child;
|
||||
|
||||
switch (mrk_parser_peek(parser).type) {
|
||||
mrk_token token = mrk_parser_peek(parser);
|
||||
|
||||
switch (token.type) {
|
||||
case mrk_token_type_text:
|
||||
MRK_RES(mrk_ast_node_child_append(&child, parent));
|
||||
MRK_RES(mrk_parser_parse_text(child, parser));
|
||||
|
@ -102,6 +106,11 @@ mrk_err mrk_parser_parse_common(mrk_parser *parser, mrk_ast_node *parent) {
|
|||
MRK_RES(mrk_ast_node_child_append(&child, parent));
|
||||
MRK_RES(mrk_parser_parse_link(child, parser));
|
||||
break;
|
||||
case mrk_token_type_backtick:
|
||||
|
||||
MRK_RES(mrk_ast_node_child_append(&child, parent));
|
||||
MRK_RES(mrk_parser_parse_code(child, parser));
|
||||
break;
|
||||
case mrk_token_type_newline:
|
||||
MRK_RES(mrk_ast_node_child_append(&child, parent));
|
||||
child->type = mrk_ast_node_type_space;
|
||||
|
@ -112,6 +121,16 @@ mrk_err mrk_parser_parse_common(mrk_parser *parser, mrk_ast_node *parent) {
|
|||
mrk_parser_advance(parser);
|
||||
mrk_parser_indent(parser);
|
||||
return mrk_err_ok;
|
||||
// All these tokens have no special meaning in this context
|
||||
case mrk_token_type_left_paren:
|
||||
case mrk_token_type_right_paren:
|
||||
mrk_parser_advance(parser);
|
||||
|
||||
MRK_RES(mrk_ast_node_child_append(&child, parent));
|
||||
child->type = mrk_ast_node_type_text;
|
||||
child->d.text.start = token.start;
|
||||
child->d.text.end = token.end;
|
||||
break;
|
||||
// Any other tokens aren't part of the common section so we just exit
|
||||
default:
|
||||
return mrk_err_ok;
|
||||
|
@ -128,7 +147,7 @@ mrk_err mrk_parser_parse_link(mrk_ast_node *out, mrk_parser *parser) {
|
|||
MRK_RES(mrk_parser_eat(&left_bracket, parser, mrk_token_type_left_bracket));
|
||||
|
||||
if (mrk_parser_done(parser)) {
|
||||
parser->error.code = mrk_parser_unclosed_brackets;
|
||||
parser->error.code = mrk_parser_err_unclosed_brackets;
|
||||
parser->error.token = left_bracket;
|
||||
|
||||
return mrk_err_invalid_md;
|
||||
|
@ -185,7 +204,7 @@ mrk_err mrk_parser_parse_list(mrk_ast_node *out, mrk_parser *parser) {
|
|||
out->d.list.ordered = true;
|
||||
break;
|
||||
default:
|
||||
parser->error.code = mrk_parser_unexpected_path;
|
||||
parser->error.code = mrk_parser_err_unexpected_path;
|
||||
return mrk_err_invalid_md;
|
||||
}
|
||||
|
||||
|
@ -245,7 +264,7 @@ mrk_err mrk_parser_parse_list_item(mrk_ast_node *out, mrk_parser *parser) {
|
|||
break;
|
||||
// This path should never be taken
|
||||
default:
|
||||
parser->error.code = mrk_parser_unexpected_path;
|
||||
parser->error.code = mrk_parser_err_unexpected_path;
|
||||
return mrk_err_invalid_md;
|
||||
}
|
||||
|
||||
|
@ -262,3 +281,59 @@ mrk_err mrk_parser_parse_list_item(mrk_ast_node *out, mrk_parser *parser) {
|
|||
|
||||
return mrk_err_ok;
|
||||
}
|
||||
|
||||
mrk_err mrk_parser_parse_code(mrk_ast_node *out, mrk_parser *parser) {
|
||||
out->type = mrk_ast_node_type_code;
|
||||
|
||||
mrk_token start_backtick;
|
||||
MRK_RES(mrk_parser_eat(&start_backtick, parser, mrk_token_type_backtick));
|
||||
|
||||
size_t indent = parser->indent;
|
||||
|
||||
while (!mrk_parser_done(parser) && parser->indent == indent) {
|
||||
mrk_ast_node *child;
|
||||
mrk_token token = mrk_parser_peek(parser);
|
||||
|
||||
switch (token.type) {
|
||||
case mrk_token_type_blank_line:
|
||||
parser->error.code = mrk_parser_err_unclosed_brackets;
|
||||
parser->error.token = start_backtick;
|
||||
|
||||
return mrk_err_invalid_md;
|
||||
case mrk_token_type_backtick:
|
||||
mrk_parser_advance(parser);
|
||||
|
||||
return mrk_err_ok;
|
||||
case mrk_token_type_newline:
|
||||
MRK_RES(mrk_ast_node_child_append(&child, out));
|
||||
child->type = mrk_ast_node_type_space;
|
||||
mrk_parser_advance(parser);
|
||||
mrk_parser_indent(parser);
|
||||
break;
|
||||
default:
|
||||
mrk_parser_advance(parser);
|
||||
|
||||
if (out->children.len == 0 ||
|
||||
out->children.arr[out->children.len - 1]->type !=
|
||||
mrk_ast_node_type_text) {
|
||||
MRK_RES(mrk_ast_node_child_append(&child, out));
|
||||
child->type = mrk_ast_node_type_text;
|
||||
child->d.text.start = token.start;
|
||||
child->d.text.end = token.end;
|
||||
}
|
||||
// Simply append current text to previous one
|
||||
else {
|
||||
child = out->children.arr[out->children.len - 1];
|
||||
child->d.text.end = token.end;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Successful exit of the function should've already happened in the while
|
||||
// loop
|
||||
parser->error.code = mrk_parser_err_unclosed_brackets;
|
||||
parser->error.token = start_backtick;
|
||||
|
||||
return mrk_err_invalid_md;
|
||||
}
|
||||
|
|
|
@ -66,9 +66,25 @@ void test_lexer_simple1() {
|
|||
TEST_CHECK(t.type == mrk_token_type_text);
|
||||
}
|
||||
|
||||
void test_lexer_code() {
|
||||
LEXER_INIT();
|
||||
|
||||
const char *buf = "`world [hello](link)`";
|
||||
mrk_lexer_open(lxr, buf, 0);
|
||||
|
||||
mrk_token t;
|
||||
|
||||
TEST_CHECK(mrk_lexer_next(&t, lxr) == mrk_lexer_err_ok);
|
||||
TEST_CHECK(t.type == mrk_token_type_backtick);
|
||||
TEST_CHECK(t.start == 0);
|
||||
TEST_CHECK(t.end == 1);
|
||||
|
||||
}
|
||||
|
||||
TEST_LIST = {
|
||||
{ "lexer header", test_lexer_header },
|
||||
{ "lexer line break", test_lexer_line_break},
|
||||
{ "lexer simple 1", test_lexer_simple1 },
|
||||
{ "lexer code", test_lexer_code },
|
||||
{ NULL, NULL }
|
||||
};
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
#include "mrk/ast.h"
|
||||
#include "test.h"
|
||||
|
||||
#include "mrk/lexer.h"
|
||||
|
@ -98,9 +99,30 @@ void test_parse_unordered_list() {
|
|||
TEST_CHECK(subchild->children.arr[0]->d.text.end == 57);
|
||||
}
|
||||
|
||||
void test_parse_code() {
|
||||
const char *buf = "`world [hello](link)`";
|
||||
PARSER_OPEN(buf);
|
||||
|
||||
mrk_ast_node *code;
|
||||
mrk_ast_node_init(&code);
|
||||
|
||||
TEST_CHECK(mrk_parser_parse_code(code, parser) == mrk_err_ok);
|
||||
|
||||
TEST_CHECK(code->type == mrk_ast_node_type_code);
|
||||
TEST_CHECK(code->children.len == 1);
|
||||
|
||||
mrk_ast_node *child = code->children.arr[0];
|
||||
TEST_CHECK(child->type == mrk_ast_node_type_text);
|
||||
TEST_CHECK(child->d.text.start == 1);
|
||||
TEST_MSG("start: %lu", child->d.text.start);
|
||||
TEST_CHECK(child->d.text.end == 20);
|
||||
TEST_MSG("end: %lu", child->d.text.end);
|
||||
}
|
||||
|
||||
TEST_LIST = {
|
||||
{ "parser header", test_parse_header },
|
||||
{ "parser link", test_parse_link },
|
||||
{ "parser unordered list", test_parse_unordered_list },
|
||||
{ "parser code", test_parse_code },
|
||||
{ NULL, NULL }
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue