feat(lexer): match check boxes
parent
ec076a56a5
commit
4ba3195ea0
|
@ -34,6 +34,8 @@ typedef enum mrk_token_type {
|
||||||
mrk_token_type_backslash,
|
mrk_token_type_backslash,
|
||||||
mrk_token_type_dotted_number,
|
mrk_token_type_dotted_number,
|
||||||
mrk_token_type_word,
|
mrk_token_type_word,
|
||||||
|
mrk_token_type_checked_box,
|
||||||
|
mrk_token_type_unchecked_box,
|
||||||
} mrk_token_type;
|
} mrk_token_type;
|
||||||
|
|
||||||
typedef struct mrk_token {
|
typedef struct mrk_token {
|
||||||
|
|
|
@ -33,10 +33,11 @@ char mrk_lexer_peek(mrk_lexer *lexer);
|
||||||
char mrk_lexer_peek_n(mrk_lexer *lexer, size_t n);
|
char mrk_lexer_peek_n(mrk_lexer *lexer, size_t n);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns true if the nul-terminated string s is equal to the next characters
|
* Returns true if the nul-terminated string s matches the next characters that
|
||||||
* in the token stream.
|
* would be consumed. This is a convenience method instead of having to call
|
||||||
|
* multiple peek calls.
|
||||||
*/
|
*/
|
||||||
bool mrk_lexer_match(mrk_lexer *lexer, const char *s);
|
bool mrk_lexer_peek_str(mrk_lexer *lexer, const char *s);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Advance the current position by one character, adding the new character to
|
* Advance the current position by one character, adding the new character to
|
||||||
|
|
|
@ -84,6 +84,24 @@ void mrk_lexer_advance_word(mrk_lexer *lexer) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool mrk_lexer_peek_str(mrk_lexer *lexer, const char *s) {
|
||||||
|
bool match = true;
|
||||||
|
|
||||||
|
size_t i = 0;
|
||||||
|
while (*s != '\0') {
|
||||||
|
// Check whether the lexer would be done before matching the entire string
|
||||||
|
bool done_in_n =
|
||||||
|
(lexer->buf.len > 0 && lexer->pos.buf_index + i == lexer->buf.len) ||
|
||||||
|
(lexer->buf.s[lexer->pos.buf_index + i] == '\0');
|
||||||
|
match = !done_in_n && (lexer->buf.s[lexer->pos.buf_index + i] == *s);
|
||||||
|
|
||||||
|
i++;
|
||||||
|
s++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return match;
|
||||||
|
}
|
||||||
|
|
||||||
char mrk_lexer_peek_n(mrk_lexer *lexer, size_t n) {
|
char mrk_lexer_peek_n(mrk_lexer *lexer, size_t n) {
|
||||||
// Check whether the lexer would be done in n steps
|
// Check whether the lexer would be done in n steps
|
||||||
bool done_in_n = false;
|
bool done_in_n = false;
|
||||||
|
@ -97,14 +115,6 @@ char mrk_lexer_peek_n(mrk_lexer *lexer, size_t n) {
|
||||||
return done_in_n ? '\0' : lexer->buf.s[lexer->pos.buf_index + n];
|
return done_in_n ? '\0' : lexer->buf.s[lexer->pos.buf_index + n];
|
||||||
}
|
}
|
||||||
|
|
||||||
/* bool mrk_lexer_match(mrk_lexer *lexer, const char *s) { */
|
|
||||||
/* size_t s_len = strlen(s); */
|
|
||||||
/* if (mrk_lexer_done(lexer) && s[0] != '\0') { */
|
|
||||||
/* return false; */
|
|
||||||
/* } */
|
|
||||||
|
|
||||||
/* } */
|
|
||||||
|
|
||||||
void mrk_lexer_reset(mrk_lexer *lexer) {
|
void mrk_lexer_reset(mrk_lexer *lexer) {
|
||||||
lexer->token.start = lexer->pos.buf_index;
|
lexer->token.start = lexer->pos.buf_index;
|
||||||
lexer->token.end = lexer->pos.buf_index;
|
lexer->token.end = lexer->pos.buf_index;
|
||||||
|
@ -168,7 +178,17 @@ mrk_lexer_err mrk_lexer_next(mrk_token *out, mrk_lexer *lexer) {
|
||||||
mrk_lexer_emit(out, lexer, mrk_token_type_bang);
|
mrk_lexer_emit(out, lexer, mrk_token_type_bang);
|
||||||
break;
|
break;
|
||||||
case '[':
|
case '[':
|
||||||
|
// Checkboxes for lists are lexed separately to simplify the parser later
|
||||||
|
// on
|
||||||
|
if (mrk_lexer_peek_str(lexer, " ]")) {
|
||||||
|
mrk_lexer_advance_n(lexer, 2);
|
||||||
|
mrk_lexer_emit(out, lexer, mrk_token_type_unchecked_box);
|
||||||
|
} else if (mrk_lexer_peek_str(lexer, "x]")) {
|
||||||
|
mrk_lexer_advance_n(lexer, 2);
|
||||||
|
mrk_lexer_emit(out, lexer, mrk_token_type_checked_box);
|
||||||
|
} else {
|
||||||
mrk_lexer_emit(out, lexer, mrk_token_type_left_bracket);
|
mrk_lexer_emit(out, lexer, mrk_token_type_left_bracket);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case ']':
|
case ']':
|
||||||
mrk_lexer_emit(out, lexer, mrk_token_type_right_bracket);
|
mrk_lexer_emit(out, lexer, mrk_token_type_right_bracket);
|
||||||
|
@ -198,7 +218,7 @@ mrk_lexer_err mrk_lexer_next(mrk_token *out, mrk_lexer *lexer) {
|
||||||
break;
|
break;
|
||||||
case ' ': {
|
case ' ': {
|
||||||
// Either a double space or a line break
|
// Either a double space or a line break
|
||||||
if (mrk_lexer_peek(lexer) == ' ' && mrk_lexer_peek_n(lexer, 1) == '\n') {
|
if (mrk_lexer_peek_str(lexer, " \n")) {
|
||||||
mrk_lexer_advance_n(lexer, 2);
|
mrk_lexer_advance_n(lexer, 2);
|
||||||
|
|
||||||
mrk_lexer_emit(out, lexer, mrk_token_type_line_break);
|
mrk_lexer_emit(out, lexer, mrk_token_type_line_break);
|
||||||
|
|
Loading…
Reference in New Issue