From 4ba3195ea0dd6b25c9afe61b1d43de9430ab978a Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Thu, 7 Mar 2024 13:56:53 +0100 Subject: [PATCH] feat(lexer): match check boxes --- include/mrk/lexer.h | 2 ++ src/_include/mrk/lexer_internal.h | 7 +++--- src/lexer/lexer.c | 40 +++++++++++++++++++++++-------- 3 files changed, 36 insertions(+), 13 deletions(-) diff --git a/include/mrk/lexer.h b/include/mrk/lexer.h index f824749..9d44088 100644 --- a/include/mrk/lexer.h +++ b/include/mrk/lexer.h @@ -34,6 +34,8 @@ typedef enum mrk_token_type { mrk_token_type_backslash, mrk_token_type_dotted_number, mrk_token_type_word, + mrk_token_type_checked_box, + mrk_token_type_unchecked_box, } mrk_token_type; typedef struct mrk_token { diff --git a/src/_include/mrk/lexer_internal.h b/src/_include/mrk/lexer_internal.h index dc5f85e..0eca5f6 100644 --- a/src/_include/mrk/lexer_internal.h +++ b/src/_include/mrk/lexer_internal.h @@ -33,10 +33,11 @@ char mrk_lexer_peek(mrk_lexer *lexer); char mrk_lexer_peek_n(mrk_lexer *lexer, size_t n); /** - * Returns true if the nul-terminated string s is equal to the next characters - * in the token stream. + * Returns true if the nul-terminated string s matches the next characters that + * would be consumed. This is a convenience method instead of having to call + * multiple peek calls. */ -bool mrk_lexer_match(mrk_lexer *lexer, const char *s); +bool mrk_lexer_peek_str(mrk_lexer *lexer, const char *s); /** * Advance the current position by one character, adding the new character to diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c index c4a4fab..eed61ff 100644 --- a/src/lexer/lexer.c +++ b/src/lexer/lexer.c @@ -84,6 +84,24 @@ void mrk_lexer_advance_word(mrk_lexer *lexer) { } } +bool mrk_lexer_peek_str(mrk_lexer *lexer, const char *s) { + bool match = true; + + size_t i = 0; + while (*s != '\0') { + // Check whether the lexer would be done before matching the entire string + bool done_in_n = + (lexer->buf.len > 0 && lexer->pos.buf_index + i == lexer->buf.len) || + (lexer->buf.s[lexer->pos.buf_index + i] == '\0'); + match = !done_in_n && (lexer->buf.s[lexer->pos.buf_index + i] == *s); + + i++; + s++; + } + + return match; +} + char mrk_lexer_peek_n(mrk_lexer *lexer, size_t n) { // Check whether the lexer would be done in n steps bool done_in_n = false; @@ -97,14 +115,6 @@ char mrk_lexer_peek_n(mrk_lexer *lexer, size_t n) { return done_in_n ? '\0' : lexer->buf.s[lexer->pos.buf_index + n]; } -/* bool mrk_lexer_match(mrk_lexer *lexer, const char *s) { */ -/* size_t s_len = strlen(s); */ -/* if (mrk_lexer_done(lexer) && s[0] != '\0') { */ -/* return false; */ -/* } */ - -/* } */ - void mrk_lexer_reset(mrk_lexer *lexer) { lexer->token.start = lexer->pos.buf_index; lexer->token.end = lexer->pos.buf_index; @@ -168,7 +178,17 @@ mrk_lexer_err mrk_lexer_next(mrk_token *out, mrk_lexer *lexer) { mrk_lexer_emit(out, lexer, mrk_token_type_bang); break; case '[': - mrk_lexer_emit(out, lexer, mrk_token_type_left_bracket); + // Checkboxes for lists are lexed separately to simplify the parser later + // on + if (mrk_lexer_peek_str(lexer, " ]")) { + mrk_lexer_advance_n(lexer, 2); + mrk_lexer_emit(out, lexer, mrk_token_type_unchecked_box); + } else if (mrk_lexer_peek_str(lexer, "x]")) { + mrk_lexer_advance_n(lexer, 2); + mrk_lexer_emit(out, lexer, mrk_token_type_checked_box); + } else { + mrk_lexer_emit(out, lexer, mrk_token_type_left_bracket); + } break; case ']': mrk_lexer_emit(out, lexer, mrk_token_type_right_bracket); @@ -198,7 +218,7 @@ mrk_lexer_err mrk_lexer_next(mrk_token *out, mrk_lexer *lexer) { break; case ' ': { // Either a double space or a line break - if (mrk_lexer_peek(lexer) == ' ' && mrk_lexer_peek_n(lexer, 1) == '\n') { + if (mrk_lexer_peek_str(lexer, " \n")) { mrk_lexer_advance_n(lexer, 2); mrk_lexer_emit(out, lexer, mrk_token_type_line_break);