feat(lexer): match check boxes
parent
ec076a56a5
commit
4ba3195ea0
|
@ -34,6 +34,8 @@ typedef enum mrk_token_type {
|
|||
mrk_token_type_backslash,
|
||||
mrk_token_type_dotted_number,
|
||||
mrk_token_type_word,
|
||||
mrk_token_type_checked_box,
|
||||
mrk_token_type_unchecked_box,
|
||||
} mrk_token_type;
|
||||
|
||||
typedef struct mrk_token {
|
||||
|
|
|
@ -33,10 +33,11 @@ char mrk_lexer_peek(mrk_lexer *lexer);
|
|||
char mrk_lexer_peek_n(mrk_lexer *lexer, size_t n);
|
||||
|
||||
/**
|
||||
* Returns true if the nul-terminated string s is equal to the next characters
|
||||
* in the token stream.
|
||||
* Returns true if the nul-terminated string s matches the next characters that
|
||||
* would be consumed. This is a convenience method instead of having to call
|
||||
* multiple peek calls.
|
||||
*/
|
||||
bool mrk_lexer_match(mrk_lexer *lexer, const char *s);
|
||||
bool mrk_lexer_peek_str(mrk_lexer *lexer, const char *s);
|
||||
|
||||
/**
|
||||
* Advance the current position by one character, adding the new character to
|
||||
|
|
|
@ -84,6 +84,24 @@ void mrk_lexer_advance_word(mrk_lexer *lexer) {
|
|||
}
|
||||
}
|
||||
|
||||
bool mrk_lexer_peek_str(mrk_lexer *lexer, const char *s) {
|
||||
bool match = true;
|
||||
|
||||
size_t i = 0;
|
||||
while (*s != '\0') {
|
||||
// Check whether the lexer would be done before matching the entire string
|
||||
bool done_in_n =
|
||||
(lexer->buf.len > 0 && lexer->pos.buf_index + i == lexer->buf.len) ||
|
||||
(lexer->buf.s[lexer->pos.buf_index + i] == '\0');
|
||||
match = !done_in_n && (lexer->buf.s[lexer->pos.buf_index + i] == *s);
|
||||
|
||||
i++;
|
||||
s++;
|
||||
}
|
||||
|
||||
return match;
|
||||
}
|
||||
|
||||
char mrk_lexer_peek_n(mrk_lexer *lexer, size_t n) {
|
||||
// Check whether the lexer would be done in n steps
|
||||
bool done_in_n = false;
|
||||
|
@ -97,14 +115,6 @@ char mrk_lexer_peek_n(mrk_lexer *lexer, size_t n) {
|
|||
return done_in_n ? '\0' : lexer->buf.s[lexer->pos.buf_index + n];
|
||||
}
|
||||
|
||||
/* bool mrk_lexer_match(mrk_lexer *lexer, const char *s) { */
|
||||
/* size_t s_len = strlen(s); */
|
||||
/* if (mrk_lexer_done(lexer) && s[0] != '\0') { */
|
||||
/* return false; */
|
||||
/* } */
|
||||
|
||||
/* } */
|
||||
|
||||
void mrk_lexer_reset(mrk_lexer *lexer) {
|
||||
lexer->token.start = lexer->pos.buf_index;
|
||||
lexer->token.end = lexer->pos.buf_index;
|
||||
|
@ -168,7 +178,17 @@ mrk_lexer_err mrk_lexer_next(mrk_token *out, mrk_lexer *lexer) {
|
|||
mrk_lexer_emit(out, lexer, mrk_token_type_bang);
|
||||
break;
|
||||
case '[':
|
||||
mrk_lexer_emit(out, lexer, mrk_token_type_left_bracket);
|
||||
// Checkboxes for lists are lexed separately to simplify the parser later
|
||||
// on
|
||||
if (mrk_lexer_peek_str(lexer, " ]")) {
|
||||
mrk_lexer_advance_n(lexer, 2);
|
||||
mrk_lexer_emit(out, lexer, mrk_token_type_unchecked_box);
|
||||
} else if (mrk_lexer_peek_str(lexer, "x]")) {
|
||||
mrk_lexer_advance_n(lexer, 2);
|
||||
mrk_lexer_emit(out, lexer, mrk_token_type_checked_box);
|
||||
} else {
|
||||
mrk_lexer_emit(out, lexer, mrk_token_type_left_bracket);
|
||||
}
|
||||
break;
|
||||
case ']':
|
||||
mrk_lexer_emit(out, lexer, mrk_token_type_right_bracket);
|
||||
|
@ -198,7 +218,7 @@ mrk_lexer_err mrk_lexer_next(mrk_token *out, mrk_lexer *lexer) {
|
|||
break;
|
||||
case ' ': {
|
||||
// Either a double space or a line break
|
||||
if (mrk_lexer_peek(lexer) == ' ' && mrk_lexer_peek_n(lexer, 1) == '\n') {
|
||||
if (mrk_lexer_peek_str(lexer, " \n")) {
|
||||
mrk_lexer_advance_n(lexer, 2);
|
||||
|
||||
mrk_lexer_emit(out, lexer, mrk_token_type_line_break);
|
||||
|
|
Loading…
Reference in New Issue