feat(lexer): match check boxes

main
Jef Roosens 2024-03-07 13:56:53 +01:00
parent ec076a56a5
commit 4ba3195ea0
Signed by: Jef Roosens
GPG Key ID: B75D4F293C7052DB
3 changed files with 36 additions and 13 deletions

View File

@ -34,6 +34,8 @@ typedef enum mrk_token_type {
mrk_token_type_backslash,
mrk_token_type_dotted_number,
mrk_token_type_word,
mrk_token_type_checked_box,
mrk_token_type_unchecked_box,
} mrk_token_type;
typedef struct mrk_token {

View File

@ -33,10 +33,11 @@ char mrk_lexer_peek(mrk_lexer *lexer);
char mrk_lexer_peek_n(mrk_lexer *lexer, size_t n);
/**
* Returns true if the nul-terminated string s is equal to the next characters
* in the token stream.
* Returns true if the nul-terminated string s matches the next characters that
* would be consumed. This is a convenience method instead of having to call
* multiple peek calls.
*/
bool mrk_lexer_match(mrk_lexer *lexer, const char *s);
bool mrk_lexer_peek_str(mrk_lexer *lexer, const char *s);
/**
* Advance the current position by one character, adding the new character to

View File

@ -84,6 +84,24 @@ void mrk_lexer_advance_word(mrk_lexer *lexer) {
}
}
bool mrk_lexer_peek_str(mrk_lexer *lexer, const char *s) {
bool match = true;
size_t i = 0;
while (*s != '\0') {
// Check whether the lexer would be done before matching the entire string
bool done_in_n =
(lexer->buf.len > 0 && lexer->pos.buf_index + i == lexer->buf.len) ||
(lexer->buf.s[lexer->pos.buf_index + i] == '\0');
match = !done_in_n && (lexer->buf.s[lexer->pos.buf_index + i] == *s);
i++;
s++;
}
return match;
}
char mrk_lexer_peek_n(mrk_lexer *lexer, size_t n) {
// Check whether the lexer would be done in n steps
bool done_in_n = false;
@ -97,14 +115,6 @@ char mrk_lexer_peek_n(mrk_lexer *lexer, size_t n) {
return done_in_n ? '\0' : lexer->buf.s[lexer->pos.buf_index + n];
}
/* bool mrk_lexer_match(mrk_lexer *lexer, const char *s) { */
/* size_t s_len = strlen(s); */
/* if (mrk_lexer_done(lexer) && s[0] != '\0') { */
/* return false; */
/* } */
/* } */
void mrk_lexer_reset(mrk_lexer *lexer) {
lexer->token.start = lexer->pos.buf_index;
lexer->token.end = lexer->pos.buf_index;
@ -168,7 +178,17 @@ mrk_lexer_err mrk_lexer_next(mrk_token *out, mrk_lexer *lexer) {
mrk_lexer_emit(out, lexer, mrk_token_type_bang);
break;
case '[':
mrk_lexer_emit(out, lexer, mrk_token_type_left_bracket);
// Checkboxes for lists are lexed separately to simplify the parser later
// on
if (mrk_lexer_peek_str(lexer, " ]")) {
mrk_lexer_advance_n(lexer, 2);
mrk_lexer_emit(out, lexer, mrk_token_type_unchecked_box);
} else if (mrk_lexer_peek_str(lexer, "x]")) {
mrk_lexer_advance_n(lexer, 2);
mrk_lexer_emit(out, lexer, mrk_token_type_checked_box);
} else {
mrk_lexer_emit(out, lexer, mrk_token_type_left_bracket);
}
break;
case ']':
mrk_lexer_emit(out, lexer, mrk_token_type_right_bracket);
@ -198,7 +218,7 @@ mrk_lexer_err mrk_lexer_next(mrk_token *out, mrk_lexer *lexer) {
break;
case ' ': {
// Either a double space or a line break
if (mrk_lexer_peek(lexer) == ' ' && mrk_lexer_peek_n(lexer, 1) == '\n') {
if (mrk_lexer_peek_str(lexer, " \n")) {
mrk_lexer_advance_n(lexer, 2);
mrk_lexer_emit(out, lexer, mrk_token_type_line_break);