feat(lexer): matching more things
This commit is contained in:
parent
f003e3555b
commit
f6e034097d
4 changed files with 111 additions and 29 deletions
|
|
@ -26,6 +26,12 @@ struct mrk_lexer {
|
|||
*/
|
||||
char mrk_lexer_peek(mrk_lexer *lexer);
|
||||
|
||||
/**
|
||||
* Return the n'th next character that would be consumed. If `n` is zero, this
|
||||
* function is equivalent to calling peek.
|
||||
*/
|
||||
char mrk_lexer_peek_n(mrk_lexer *lexer, size_t n);
|
||||
|
||||
/**
|
||||
* Returns true if the nul-terminated string s is equal to the next characters
|
||||
* in the token stream.
|
||||
|
|
@ -38,6 +44,11 @@ bool mrk_lexer_match(mrk_lexer *lexer, const char *s);
|
|||
*/
|
||||
char mrk_lexer_advance(mrk_lexer *lexer);
|
||||
|
||||
/**
|
||||
* Advance until the next element to peek is not equal to c.
|
||||
*/
|
||||
void mrk_lexer_advance_eq(mrk_lexer *lexer, char c);
|
||||
|
||||
/**
|
||||
* Advance `n` positions; equivalent to running advance `n` times and returning
|
||||
* the last call's result.
|
||||
|
|
|
|||
|
|
@ -1,3 +1,6 @@
|
|||
#include <ctype.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "mrk/lexer.h"
|
||||
#include "mrk/lexer_internal.h"
|
||||
|
||||
|
|
@ -55,11 +58,18 @@ char mrk_lexer_advance(mrk_lexer *lexer) {
|
|||
char mrk_lexer_advance_n(mrk_lexer *lexer, size_t n) {
|
||||
while (n > 1) {
|
||||
mrk_lexer_advance(lexer);
|
||||
n--;
|
||||
}
|
||||
|
||||
return mrk_lexer_advance(lexer);
|
||||
}
|
||||
|
||||
void mrk_lexer_advance_eq(mrk_lexer *lexer, char c) {
|
||||
while (mrk_lexer_peek(lexer) == c) {
|
||||
mrk_lexer_advance(lexer);
|
||||
}
|
||||
}
|
||||
|
||||
char mrk_lexer_peek(mrk_lexer *lexer) {
|
||||
if (mrk_lexer_done(lexer)) {
|
||||
return '\0';
|
||||
|
|
@ -68,6 +78,27 @@ char mrk_lexer_peek(mrk_lexer *lexer) {
|
|||
return lexer->buf.s[lexer->pos.buf_index];
|
||||
}
|
||||
|
||||
char mrk_lexer_peek_n(mrk_lexer *lexer, size_t n) {
|
||||
// Check whether the lexer would be done in n steps
|
||||
bool done_in_n = false;
|
||||
|
||||
for (size_t i = 0; i < n && !done_in_n; i++) {
|
||||
done_in_n =
|
||||
(lexer->buf.len > 0 && lexer->pos.buf_index + i == lexer->buf.len) ||
|
||||
(lexer->buf.s[lexer->pos.buf_index + i] == '\0');
|
||||
}
|
||||
|
||||
return done_in_n ? '\0' : lexer->buf.s[lexer->pos.buf_index + n];
|
||||
}
|
||||
|
||||
/* bool mrk_lexer_match(mrk_lexer *lexer, const char *s) { */
|
||||
/* size_t s_len = strlen(s); */
|
||||
/* if (mrk_lexer_done(lexer) && s[0] != '\0') { */
|
||||
/* return false; */
|
||||
/* } */
|
||||
|
||||
/* } */
|
||||
|
||||
void mrk_lexer_reset(mrk_lexer *lexer) {
|
||||
lexer->token.start = lexer->pos.buf_index;
|
||||
lexer->token.end = lexer->pos.buf_index;
|
||||
|
|
@ -90,14 +121,29 @@ mrk_lexer_err mrk_lexer_next(mrk_token *out, mrk_lexer *lexer) {
|
|||
mrk_lexer_reset(lexer);
|
||||
|
||||
while (!lexer->token.emitted && !mrk_lexer_done(lexer)) {
|
||||
switch (mrk_lexer_advance(lexer)) {
|
||||
// Match one or more hashtags as a single header definition
|
||||
char c = mrk_lexer_advance(lexer);
|
||||
switch (c) {
|
||||
// All these characters have multiple meanings depending on their location
|
||||
// in the file and how many there are
|
||||
case '#':
|
||||
while (mrk_lexer_peek(lexer) == '#') {
|
||||
mrk_lexer_advance(lexer);
|
||||
}
|
||||
|
||||
mrk_lexer_emit(out, lexer, mrk_token_type_header);
|
||||
mrk_lexer_advance_eq(lexer, c);
|
||||
mrk_lexer_emit(out, lexer, mrk_token_type_pounds);
|
||||
break;
|
||||
case '`':
|
||||
mrk_lexer_advance_eq(lexer, c);
|
||||
mrk_lexer_emit(out, lexer, mrk_token_type_backticks);
|
||||
break;
|
||||
case '-':
|
||||
mrk_lexer_advance_eq(lexer, c);
|
||||
mrk_lexer_emit(out, lexer, mrk_token_type_dashes);
|
||||
break;
|
||||
case '_':
|
||||
mrk_lexer_advance_eq(lexer, c);
|
||||
mrk_lexer_emit(out, lexer, mrk_token_type_underscores);
|
||||
break;
|
||||
case '*':
|
||||
mrk_lexer_advance_eq(lexer, c);
|
||||
mrk_lexer_emit(out, lexer, mrk_token_type_stars);
|
||||
break;
|
||||
// Two consecutive newlines constitute a blank line, otherwise they're
|
||||
// ignored as whitespace
|
||||
|
|
@ -110,29 +156,22 @@ mrk_lexer_err mrk_lexer_next(mrk_token *out, mrk_lexer *lexer) {
|
|||
}
|
||||
break;
|
||||
case ' ': {
|
||||
/* if (mrk_lexer_peek(lexer) == ' ' && mrk_lexer_peek_n(lexer,)) */
|
||||
// Either a double space or a line break
|
||||
if (mrk_lexer_peek(lexer) == ' ') {
|
||||
mrk_lexer_advance(lexer);
|
||||
if (mrk_lexer_peek(lexer) == ' ' && mrk_lexer_peek_n(lexer, 1) == '\n') {
|
||||
mrk_lexer_advance_n(lexer, 2);
|
||||
|
||||
if (mrk_lexer_peek(lexer) == '\n') {
|
||||
mrk_lexer_advance(lexer);
|
||||
mrk_lexer_emit(out, lexer, mrk_token_type_line_break);
|
||||
} else {
|
||||
mrk_lexer_emit(out, lexer, mrk_token_type_space_space);
|
||||
}
|
||||
mrk_lexer_emit(out, lexer, mrk_token_type_line_break);
|
||||
} else {
|
||||
mrk_lexer_reset(lexer);
|
||||
mrk_lexer_emit(out, lexer, mrk_token_type_space);
|
||||
}
|
||||
} break;
|
||||
/* case '*': */
|
||||
/* if (mrk_lexer_peek(lexer) == '*') { */
|
||||
/* mrk_lexer_advance(lexer); */
|
||||
/* mrk_lexer_emit(out, lexer, mrk_token_type_star_star); */
|
||||
/* } else { */
|
||||
/* // TODO match word */
|
||||
/* } */
|
||||
/* default: */
|
||||
/* return mrk_lexer_err_unexpected_char; */
|
||||
case '\t':
|
||||
mrk_lexer_emit(out, lexer, mrk_token_type_tab);
|
||||
break;
|
||||
case '>':
|
||||
mrk_lexer_emit(out, lexer, mrk_token_type_right_angle_bracket);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue