feat(parser): start of parsing headers
parent
6dba1a8291
commit
a4a41b287e
|
@ -1,16 +1,18 @@
|
|||
#ifndef MRK_AST
|
||||
#define MRK_AST
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "mrk/common.h"
|
||||
|
||||
#define MRK_AST_NODE_ARGS 1
|
||||
#define MRK_AST_NODE_ARGS 4
|
||||
|
||||
typedef enum mrk_ast_node_type {
|
||||
mrk_ast_node_type_none = 0,
|
||||
mrk_ast_node_type_header,
|
||||
mrk_ast_node_type_sentence,
|
||||
mrk_ast_node_type_text,
|
||||
mrk_ast_node_type_space,
|
||||
} mrk_ast_node_type;
|
||||
|
||||
typedef struct mrk_ast_node {
|
||||
|
@ -22,6 +24,7 @@ typedef struct mrk_ast_node {
|
|||
struct {
|
||||
void *ptr;
|
||||
size_t num;
|
||||
bool state;
|
||||
} args[MRK_AST_NODE_ARGS];
|
||||
} mrk_ast_node;
|
||||
|
||||
|
@ -30,6 +33,9 @@ typedef struct mrk_ast_node {
|
|||
*/
|
||||
mrk_err mrk_ast_node_init(mrk_ast_node **out);
|
||||
|
||||
mrk_err mrk_ast_node_child_append(mrk_ast_node *parent, mrk_ast_node *child);
|
||||
/**
|
||||
* Allocate a new empty child node and return a pointer to it.
|
||||
*/
|
||||
mrk_err mrk_ast_node_child_append(mrk_ast_node **out, mrk_ast_node *parent);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -25,6 +25,7 @@ typedef enum mrk_token_type {
|
|||
mrk_token_type_star,
|
||||
mrk_token_type_double_star,
|
||||
mrk_token_type_newline,
|
||||
mrk_token_type_blank_line,
|
||||
mrk_token_type_line_break,
|
||||
mrk_token_type_right_angle_brackets,
|
||||
mrk_token_type_left_bracket,
|
||||
|
|
|
@ -40,8 +40,25 @@ mrk_token mrk_parser_peek(mrk_parser *parser);
|
|||
*/
|
||||
void mrk_parser_advance(mrk_parser *parser);
|
||||
|
||||
mrk_err mrk_parser_parse_block(mrk_ast_node **out, mrk_parser *parser);
|
||||
/**
|
||||
* Advance the parser only if the newly consumed token would be the one
|
||||
* specified as the argument. Otherwise, throw an error.
|
||||
*/
|
||||
mrk_err mrk_parser_eat(mrk_token *out, mrk_parser *parser, mrk_token_type type);
|
||||
|
||||
mrk_err mrk_parser_parse_header(mrk_ast_node **out, mrk_parser *parser);
|
||||
mrk_err mrk_parser_parse_block(mrk_ast_node *out, mrk_parser *parser);
|
||||
|
||||
mrk_err mrk_parser_parse_header(mrk_ast_node *out, mrk_parser *parser);
|
||||
|
||||
/**
|
||||
* Parse a text token, as well as any following text tokens, delimited by a
|
||||
* single newline which will be converted to a space.
|
||||
*/
|
||||
mrk_err mrk_parser_parse_text(mrk_ast_node *out, mrk_parser *parser);
|
||||
|
||||
/**
|
||||
* Parse a link construct
|
||||
*/
|
||||
mrk_err mrk_parser_parse_link(mrk_ast_node *out, mrk_parser *parser);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -6,7 +6,7 @@ mrk_err mrk_ast_node_init(mrk_ast_node **out) {
|
|||
return mrk_err_ok;
|
||||
}
|
||||
|
||||
mrk_err mrk_ast_node_child_append(mrk_ast_node *parent, mrk_ast_node *child) {
|
||||
mrk_err mrk_ast_node_child_append(mrk_ast_node **out, mrk_ast_node *parent) {
|
||||
mrk_ast_node **children =
|
||||
parent->children.len == 0
|
||||
? malloc(sizeof(mrk_ast_node *))
|
||||
|
@ -17,7 +17,9 @@ mrk_err mrk_ast_node_child_append(mrk_ast_node *parent, mrk_ast_node *child) {
|
|||
return mrk_err_failed_alloc;
|
||||
}
|
||||
|
||||
children[parent->children.len] = child;
|
||||
MRK_RES(mrk_ast_node_init(out));
|
||||
|
||||
children[parent->children.len] = *out;
|
||||
parent->children.arr = children;
|
||||
parent->children.len++;
|
||||
|
||||
|
|
|
@ -265,8 +265,12 @@ void mrk_lexer_lex_start_of_line(mrk_token *out, mrk_lexer *lexer) {
|
|||
}
|
||||
} break;
|
||||
case '\n':
|
||||
mrk_lexer_emit(out, lexer, mrk_token_type_newline);
|
||||
break;
|
||||
if (mrk_lexer_peek(lexer) == '\n') {
|
||||
mrk_lexer_advance(lexer);
|
||||
mrk_lexer_emit(out, lexer, mrk_token_type_blank_line);
|
||||
} else {
|
||||
mrk_lexer_emit(out, lexer, mrk_token_type_newline);
|
||||
}
|
||||
case '\t':
|
||||
mrk_lexer_emit(out, lexer, mrk_token_type_indent);
|
||||
break;
|
||||
|
@ -332,7 +336,12 @@ void mrk_lexer_lex_middle_of_line(mrk_token *out, mrk_lexer *lexer) {
|
|||
mrk_lexer_emit(out, lexer, mrk_token_type_backslash);
|
||||
break;
|
||||
case '\n':
|
||||
mrk_lexer_emit(out, lexer, mrk_token_type_newline);
|
||||
if (mrk_lexer_peek(lexer) == '\n') {
|
||||
mrk_lexer_advance(lexer);
|
||||
mrk_lexer_emit(out, lexer, mrk_token_type_blank_line);
|
||||
} else {
|
||||
mrk_lexer_emit(out, lexer, mrk_token_type_newline);
|
||||
}
|
||||
break;
|
||||
case '!':
|
||||
mrk_lexer_emit(out, lexer, mrk_token_type_bang);
|
||||
|
|
|
@ -15,10 +15,10 @@ mrk_err mrk_parser_parse(mrk_ast_node **out, mrk_parser *parser) {
|
|||
MRK_RES(mrk_ast_node_init(&root));
|
||||
|
||||
while (!mrk_lexer_done(parser->lexer)) {
|
||||
mrk_ast_node *block_root;
|
||||
mrk_ast_node *child;
|
||||
|
||||
MRK_RES(mrk_parser_parse_block(&block_root, parser));
|
||||
MRK_RES(mrk_ast_node_child_append(root, block_root));
|
||||
MRK_RES(mrk_ast_node_child_append(&child, root));
|
||||
MRK_RES(mrk_parser_parse_block(child, parser));
|
||||
}
|
||||
|
||||
*out = root;
|
||||
|
@ -26,40 +26,49 @@ mrk_err mrk_parser_parse(mrk_ast_node **out, mrk_parser *parser) {
|
|||
return mrk_err_ok;
|
||||
}
|
||||
|
||||
mrk_err mrk_parser_parse_block(mrk_ast_node **out, mrk_parser *parser) {
|
||||
mrk_err mrk_parser_parse_block(mrk_ast_node *out, mrk_parser *parser) {
|
||||
mrk_token t = mrk_parser_peek(parser);
|
||||
|
||||
mrk_err (*parse_fn)(mrk_ast_node **, mrk_parser *) = NULL;
|
||||
|
||||
/* switch (t.type) { */
|
||||
/* case mrk_token_type_pounds: { */
|
||||
/* parse_fn = mrk_parser_parse_header; */
|
||||
/* break; */
|
||||
/* } */
|
||||
/* } */
|
||||
|
||||
if (parse_fn == NULL) {
|
||||
MRK_PARSE_ERR(parser, t, "Unexpected token.");
|
||||
switch (t.type) {
|
||||
case mrk_token_type_header_start:
|
||||
MRK_RES(mrk_parser_parse_header(out, parser));
|
||||
break;
|
||||
}
|
||||
|
||||
return parse_fn(out, parser);
|
||||
if (!mrk_parser_done(parser)) {
|
||||
mrk_parser_eat(NULL, parser, mrk_token_type_blank_line);
|
||||
}
|
||||
|
||||
return mrk_err_ok;
|
||||
}
|
||||
|
||||
/* mrk_err mrk_parser_parse_ */
|
||||
|
||||
mrk_err mrk_parser_parse_header(mrk_ast_node **out, mrk_parser *parser) {
|
||||
mrk_token t = mrk_parser_peek(parser);
|
||||
mrk_err mrk_parser_parse_header(mrk_ast_node *out, mrk_parser *parser) {
|
||||
mrk_token header_token;
|
||||
mrk_parser_eat(&header_token, parser, mrk_token_type_header_start);
|
||||
|
||||
if (mrk_token_len(t) > MRK_MAX_HEADER_LEN) {
|
||||
MRK_PARSE_ERR(parser, t, "Headers can be at most 6 levels deep.");
|
||||
out->type = mrk_ast_node_type_header;
|
||||
out->args[0].num = mrk_token_len(header_token);
|
||||
|
||||
// Parse subsections of header
|
||||
while (!mrk_parser_done(parser) &&
|
||||
mrk_parser_peek(parser).type != mrk_token_type_blank_line) {
|
||||
mrk_ast_node *child;
|
||||
MRK_RES(mrk_ast_node_child_append(&child, out));
|
||||
|
||||
switch (mrk_parser_peek(parser).type) {
|
||||
case mrk_token_type_text:
|
||||
MRK_RES(mrk_parser_parse_text(child, parser));
|
||||
break;
|
||||
// Newlines are interpreted as spaces
|
||||
case mrk_token_type_newline:
|
||||
child->type = mrk_ast_node_type_space;
|
||||
mrk_parser_advance(parser);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
mrk_parser_advance(parser);
|
||||
|
||||
mrk_ast_node *header;
|
||||
MRK_RES(mrk_ast_node_init(&header));
|
||||
header->args[0].num = mrk_token_len(t);
|
||||
|
||||
// Headers are blocks of their own, so they're delimited by blank lines
|
||||
/* while (!mrk_parser_done(parser) && */
|
||||
/* (t = mrk_parser_peek(parser)).type != mrk_token_type_blank_line) {
|
||||
|
@ -71,6 +80,17 @@ mrk_err mrk_parser_parse_header(mrk_ast_node **out, mrk_parser *parser) {
|
|||
// Skip blank line
|
||||
mrk_parser_advance(parser);
|
||||
|
||||
*out = header;
|
||||
return mrk_err_ok;
|
||||
}
|
||||
|
||||
mrk_err mrk_parser_parse_text(mrk_ast_node *out, mrk_parser *parser) {
|
||||
mrk_token text_token;
|
||||
MRK_RES(mrk_parser_eat(&text_token, parser, mrk_token_type_text));
|
||||
|
||||
// Start in input buffer
|
||||
out->args[0].num = text_token.start;
|
||||
// End in input buffer
|
||||
out->args[1].num = text_token.end;
|
||||
|
||||
return mrk_err_ok;
|
||||
}
|
||||
|
|
|
@ -32,3 +32,21 @@ void mrk_parser_advance(mrk_parser *parser) {
|
|||
parser->lookahead.index =
|
||||
(parser->lookahead.index + 1) % MRK_PARSER_LOOKAHEAD_BUF_SIZE;
|
||||
}
|
||||
|
||||
mrk_err mrk_parser_eat(mrk_token *out, mrk_parser *parser,
|
||||
mrk_token_type type) {
|
||||
if (mrk_parser_done(parser) || mrk_parser_peek(parser).type != type) {
|
||||
parser->error.msg = "Unexpected token";
|
||||
parser->error.token = mrk_parser_peek(parser);
|
||||
|
||||
return mrk_err_unexpected_token;
|
||||
}
|
||||
|
||||
if (out != NULL) {
|
||||
*out = mrk_parser_peek(parser);
|
||||
}
|
||||
|
||||
mrk_parser_advance(parser);
|
||||
|
||||
return mrk_err_ok;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue