feat: lex and parse single-line code blocks

main
Jef Roosens 2024-03-25 17:03:00 +01:00
parent 2387461e21
commit c653d437bd
Signed by: Jef Roosens
GPG Key ID: 02D4C0997E74717B
8 changed files with 128 additions and 10 deletions

View File

@ -15,6 +15,7 @@ typedef enum mrk_ast_node_type {
mrk_ast_node_type_paragraph,
mrk_ast_node_type_list,
mrk_ast_node_type_list_item,
mrk_ast_node_type_code,
} mrk_ast_node_type;
typedef struct mrk_ast_node {

View File

@ -13,8 +13,9 @@ typedef enum mrk_parser_err {
mrk_parser_err_ok = 0,
mrk_parser_err_unexpected_eat,
mrk_parser_err_unexpected_token,
mrk_parser_unclosed_brackets,
mrk_parser_unexpected_path,
mrk_parser_err_unclosed_brackets,
mrk_parser_err_unclosed_backticks,
mrk_parser_err_unexpected_path,
} mrk_parser_err;
/**

View File

@ -92,4 +92,9 @@ mrk_err mrk_parser_parse_list(mrk_ast_node *out, mrk_parser *parser);
mrk_err mrk_parser_parse_list_item(mrk_ast_node *out, mrk_parser *parser);
/**
* Parse a single-line code segment.
*/
mrk_err mrk_parser_parse_code(mrk_ast_node *out, mrk_parser *parser);
#endif

View File

@ -286,7 +286,6 @@ void mrk_lexer_lex_start_of_line(mrk_token *out, mrk_lexer *lexer) {
mrk_lexer_advance_n(lexer, 2);
mrk_lexer_emit(out, lexer, mrk_token_type_triple_backtick);
} else {
mrk_lexer_advance(lexer);
mrk_lexer_emit(out, lexer, mrk_token_type_backtick);
}
break;
@ -373,7 +372,6 @@ void mrk_lexer_lex_middle_of_line(mrk_token *out, mrk_lexer *lexer) {
}
} break;
case '`':
mrk_lexer_advance(lexer);
mrk_lexer_emit(out, lexer, mrk_token_type_backtick);
break;
default:

View File

@ -32,12 +32,12 @@ const char *mrk_parser_err_msg(mrk_parser *parser) {
mrk_token_type_names[parser->error.token.type],
parser->error.token.type);
break;
case mrk_parser_unclosed_brackets:
case mrk_parser_err_unclosed_brackets:
sprintf(parser->error.buf, "%lu:%lu: unclosed bracket",
parser->error.token.start_line + 1,
parser->error.token.start_line_index + 1);
break;
case mrk_parser_unexpected_path:
case mrk_parser_err_unexpected_path:
return unexpected_path_msg;
}

View File

@ -1,3 +1,5 @@
#include "mrk/parser.h"
#include "mrk/lexer.h"
#include "mrk/parser_internal.h"
#include <stdio.h>
@ -93,7 +95,9 @@ mrk_err mrk_parser_parse_common(mrk_parser *parser, mrk_ast_node *parent) {
while (!mrk_parser_done(parser) && parser->indent == indent) {
mrk_ast_node *child;
switch (mrk_parser_peek(parser).type) {
mrk_token token = mrk_parser_peek(parser);
switch (token.type) {
case mrk_token_type_text:
MRK_RES(mrk_ast_node_child_append(&child, parent));
MRK_RES(mrk_parser_parse_text(child, parser));
@ -102,6 +106,11 @@ mrk_err mrk_parser_parse_common(mrk_parser *parser, mrk_ast_node *parent) {
MRK_RES(mrk_ast_node_child_append(&child, parent));
MRK_RES(mrk_parser_parse_link(child, parser));
break;
case mrk_token_type_backtick:
MRK_RES(mrk_ast_node_child_append(&child, parent));
MRK_RES(mrk_parser_parse_code(child, parser));
break;
case mrk_token_type_newline:
MRK_RES(mrk_ast_node_child_append(&child, parent));
child->type = mrk_ast_node_type_space;
@ -112,6 +121,16 @@ mrk_err mrk_parser_parse_common(mrk_parser *parser, mrk_ast_node *parent) {
mrk_parser_advance(parser);
mrk_parser_indent(parser);
return mrk_err_ok;
// All these tokens have no special meaning in this context
case mrk_token_type_left_paren:
case mrk_token_type_right_paren:
mrk_parser_advance(parser);
MRK_RES(mrk_ast_node_child_append(&child, parent));
child->type = mrk_ast_node_type_text;
child->d.text.start = token.start;
child->d.text.end = token.end;
break;
// Any other tokens aren't part of the common section so we just exit
default:
return mrk_err_ok;
@ -128,7 +147,7 @@ mrk_err mrk_parser_parse_link(mrk_ast_node *out, mrk_parser *parser) {
MRK_RES(mrk_parser_eat(&left_bracket, parser, mrk_token_type_left_bracket));
if (mrk_parser_done(parser)) {
parser->error.code = mrk_parser_unclosed_brackets;
parser->error.code = mrk_parser_err_unclosed_brackets;
parser->error.token = left_bracket;
return mrk_err_invalid_md;
@ -185,7 +204,7 @@ mrk_err mrk_parser_parse_list(mrk_ast_node *out, mrk_parser *parser) {
out->d.list.ordered = true;
break;
default:
parser->error.code = mrk_parser_unexpected_path;
parser->error.code = mrk_parser_err_unexpected_path;
return mrk_err_invalid_md;
}
@ -245,7 +264,7 @@ mrk_err mrk_parser_parse_list_item(mrk_ast_node *out, mrk_parser *parser) {
break;
// This path should never be taken
default:
parser->error.code = mrk_parser_unexpected_path;
parser->error.code = mrk_parser_err_unexpected_path;
return mrk_err_invalid_md;
}
@ -262,3 +281,59 @@ mrk_err mrk_parser_parse_list_item(mrk_ast_node *out, mrk_parser *parser) {
return mrk_err_ok;
}
mrk_err mrk_parser_parse_code(mrk_ast_node *out, mrk_parser *parser) {
out->type = mrk_ast_node_type_code;
mrk_token start_backtick;
MRK_RES(mrk_parser_eat(&start_backtick, parser, mrk_token_type_backtick));
size_t indent = parser->indent;
while (!mrk_parser_done(parser) && parser->indent == indent) {
mrk_ast_node *child;
mrk_token token = mrk_parser_peek(parser);
switch (token.type) {
case mrk_token_type_blank_line:
parser->error.code = mrk_parser_err_unclosed_brackets;
parser->error.token = start_backtick;
return mrk_err_invalid_md;
case mrk_token_type_backtick:
mrk_parser_advance(parser);
return mrk_err_ok;
case mrk_token_type_newline:
MRK_RES(mrk_ast_node_child_append(&child, out));
child->type = mrk_ast_node_type_space;
mrk_parser_advance(parser);
mrk_parser_indent(parser);
break;
default:
mrk_parser_advance(parser);
if (out->children.len == 0 ||
out->children.arr[out->children.len - 1]->type !=
mrk_ast_node_type_text) {
MRK_RES(mrk_ast_node_child_append(&child, out));
child->type = mrk_ast_node_type_text;
child->d.text.start = token.start;
child->d.text.end = token.end;
}
// Simply append current text to previous one
else {
child = out->children.arr[out->children.len - 1];
child->d.text.end = token.end;
}
break;
}
}
// Successful exit of the function should've already happened in the while
// loop
parser->error.code = mrk_parser_err_unclosed_brackets;
parser->error.token = start_backtick;
return mrk_err_invalid_md;
}

View File

@ -66,9 +66,25 @@ void test_lexer_simple1() {
TEST_CHECK(t.type == mrk_token_type_text);
}
void test_lexer_code() {
LEXER_INIT();
const char *buf = "`world [hello](link)`";
mrk_lexer_open(lxr, buf, 0);
mrk_token t;
TEST_CHECK(mrk_lexer_next(&t, lxr) == mrk_lexer_err_ok);
TEST_CHECK(t.type == mrk_token_type_backtick);
TEST_CHECK(t.start == 0);
TEST_CHECK(t.end == 1);
}
TEST_LIST = {
{ "lexer header", test_lexer_header },
{ "lexer line break", test_lexer_line_break},
{ "lexer simple 1", test_lexer_simple1 },
{ "lexer code", test_lexer_code },
{ NULL, NULL }
};

View File

@ -1,3 +1,4 @@
#include "mrk/ast.h"
#include "test.h"
#include "mrk/lexer.h"
@ -98,9 +99,30 @@ void test_parse_unordered_list() {
TEST_CHECK(subchild->children.arr[0]->d.text.end == 57);
}
void test_parse_code() {
const char *buf = "`world [hello](link)`";
PARSER_OPEN(buf);
mrk_ast_node *code;
mrk_ast_node_init(&code);
TEST_CHECK(mrk_parser_parse_code(code, parser) == mrk_err_ok);
TEST_CHECK(code->type == mrk_ast_node_type_code);
TEST_CHECK(code->children.len == 1);
mrk_ast_node *child = code->children.arr[0];
TEST_CHECK(child->type == mrk_ast_node_type_text);
TEST_CHECK(child->d.text.start == 1);
TEST_MSG("start: %lu", child->d.text.start);
TEST_CHECK(child->d.text.end == 20);
TEST_MSG("end: %lu", child->d.text.end);
}
TEST_LIST = {
{ "parser header", test_parse_header },
{ "parser link", test_parse_link },
{ "parser unordered list", test_parse_unordered_list },
{ "parser code", test_parse_code },
{ NULL, NULL }
};