From dcc52e28508e6a7081e0874d738a99077d27c3b7 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Tue, 5 Mar 2024 10:07:57 +0100 Subject: [PATCH] feat(lexer): add more options --- config.mk | 2 +- include/mrk/lexer.h | 13 ++++++++++--- src/lexer/lexer.c | 42 +++++++++++++++++++++++++++++++++--------- 3 files changed, 44 insertions(+), 13 deletions(-) diff --git a/config.mk b/config.mk index 0c88a69..426bb55 100644 --- a/config.mk +++ b/config.mk @@ -14,4 +14,4 @@ INC_DIRS = $(PUB_INC_DIR) src/_include # object file is also recompiled if only a header is changed. # -MP: generate a dummy target for every header file (according to the docs it # prevents some errors when removing header files) -CFLAGS ?= -MMD -MP -g +CFLAGS ?= -MMD -MP -g -Wall -Wextra diff --git a/include/mrk/lexer.h b/include/mrk/lexer.h index d8782fa..01feae7 100644 --- a/include/mrk/lexer.h +++ b/include/mrk/lexer.h @@ -21,10 +21,17 @@ typedef enum mrk_token_type { mrk_token_type_stars, mrk_token_type_equals, mrk_token_type_blank_line, - mrk_token_type_space, + mrk_token_type_newline, + mrk_token_type_spaces, mrk_token_type_line_break, - mrk_token_type_right_angle_bracket, - mrk_token_type_tab, + mrk_token_type_right_angle_brackets, + mrk_token_type_tabs, + mrk_token_type_left_bracket, + mrk_token_type_right_bracket, + mrk_token_type_bang, + mrk_token_type_left_paren, + mrk_token_type_right_paren, + mrk_token_type_backslash, } mrk_token_type; typedef struct mrk_token { diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c index 3dd2463..089c576 100644 --- a/src/lexer/lexer.c +++ b/src/lexer/lexer.c @@ -150,6 +150,36 @@ mrk_lexer_err mrk_lexer_next(mrk_token *out, mrk_lexer *lexer) { mrk_lexer_advance_eq(lexer, c); mrk_lexer_emit(out, lexer, mrk_token_type_equals); break; + case '\t': + mrk_lexer_advance_eq(lexer, c); + mrk_lexer_emit(out, lexer, mrk_token_type_tabs); + break; + case '>': + mrk_lexer_advance_eq(lexer, c); + mrk_lexer_emit(out, lexer, mrk_token_type_right_angle_brackets); + break; + case '!': + mrk_lexer_emit(out, lexer, mrk_token_type_bang); + break; + case '[': + mrk_lexer_emit(out, lexer, mrk_token_type_left_bracket); + break; + case ']': + mrk_lexer_emit(out, lexer, mrk_token_type_right_bracket); + break; + case '(': + mrk_lexer_emit(out, lexer, mrk_token_type_left_paren); + break; + case ')': + mrk_lexer_emit(out, lexer, mrk_token_type_right_paren); + break; + case '\\': + if (mrk_lexer_peek(lexer) == '\n') { + mrk_lexer_emit(out, lexer, mrk_token_type_line_break); + } else { + mrk_lexer_emit(out, lexer, mrk_token_type_backslash); + } + break; // Two consecutive newlines constitute a blank line, otherwise they're // ignored as whitespace case '\n': @@ -157,26 +187,20 @@ mrk_lexer_err mrk_lexer_next(mrk_token *out, mrk_lexer *lexer) { mrk_lexer_advance(lexer); mrk_lexer_emit(out, lexer, mrk_token_type_blank_line); } else { - mrk_lexer_reset(lexer); + mrk_lexer_emit(out, lexer, mrk_token_type_newline); } break; case ' ': { - /* if (mrk_lexer_peek(lexer) == ' ' && mrk_lexer_peek_n(lexer,)) */ // Either a double space or a line break if (mrk_lexer_peek(lexer) == ' ' && mrk_lexer_peek_n(lexer, 1) == '\n') { mrk_lexer_advance_n(lexer, 2); mrk_lexer_emit(out, lexer, mrk_token_type_line_break); } else { - mrk_lexer_emit(out, lexer, mrk_token_type_space); + mrk_lexer_advance_eq(lexer, ' '); + mrk_lexer_emit(out, lexer, mrk_token_type_spaces); } } break; - case '\t': - mrk_lexer_emit(out, lexer, mrk_token_type_tab); - break; - case '>': - mrk_lexer_emit(out, lexer, mrk_token_type_right_angle_bracket); - break; } }