From 1bb0e7f119ba7b24cb770c89dbe02e89e5407939 Mon Sep 17 00:00:00 2001 From: Jef Roosens Date: Fri, 15 Dec 2023 10:01:46 +0100 Subject: [PATCH] feat(ltm): some more work on the parser --- ltm/include/ltm/common.h | 4 +- ltm/include/ltm/template.h | 3 +- ltm/src/_include/ltm/template_internal.h | 8 +- ltm/src/ltm_template_compile.c | 145 ++++++++++++++++------- 4 files changed, 112 insertions(+), 48 deletions(-) diff --git a/ltm/include/ltm/common.h b/ltm/include/ltm/common.h index d3fd9f9..ef86779 100644 --- a/ltm/include/ltm/common.h +++ b/ltm/include/ltm/common.h @@ -19,8 +19,8 @@ typedef enum ltm_err { ltm_err_ok = 0, - ltm_invalid_template, - ltm_failed_alloc, + ltm_err_invalid_template, + ltm_err_failed_alloc, } ltm_err; #endif diff --git a/ltm/include/ltm/template.h b/ltm/include/ltm/template.h index ceb7396..7a920fa 100644 --- a/ltm/include/ltm/template.h +++ b/ltm/include/ltm/template.h @@ -25,6 +25,7 @@ ltm_err ltm_template_compile(ltm_template **out, const char *template); * @param template char buffer containing the template * @param len length of the char buffer */ -ltm_err ltm_template_compile_n(ltm_template **out, const char *template, size_t len); +ltm_err ltm_template_compile_n(ltm_template **out, const char *template, + size_t len); #endif diff --git a/ltm/src/_include/ltm/template_internal.h b/ltm/src/_include/ltm/template_internal.h index d20c606..29ae7c7 100644 --- a/ltm/src/_include/ltm/template_internal.h +++ b/ltm/src/_include/ltm/template_internal.h @@ -4,7 +4,8 @@ #include "ltm/template.h" typedef enum ltm_placeholder_type { - ltm_placeholder_type_var = 0, + ltm_placeholder_type_invalid = 0, + ltm_placeholder_type_var, ltm_placeholder_type_loop_start, ltm_placeholder_type_loop_end, } ltm_placeholder_type; @@ -15,7 +16,10 @@ typedef enum ltm_placeholder_type { typedef struct ltm_placeholder { const char *start; const char *end; - const char *name; + struct { + const char *s; + size_t len; + } name; ltm_placeholder_type type; } ltm_placeholder; diff --git a/ltm/src/ltm_template_compile.c b/ltm/src/ltm_template_compile.c index fe62181..85145e3 100644 --- a/ltm/src/ltm_template_compile.c +++ b/ltm/src/ltm_template_compile.c @@ -1,3 +1,4 @@ +#include #include #include "ltm/common.h" @@ -8,7 +9,7 @@ ltm_err ltm_template_init(ltm_template **out) { ltm_template *template = calloc(1, sizeof(ltm_template)); if (template == NULL) { - return ltm_failed_alloc; + return ltm_err_failed_alloc; } *out = template; @@ -16,7 +17,8 @@ ltm_err ltm_template_init(ltm_template **out) { return ltm_err_ok; } -ltm_err ltm_template_next_placeholder(ltm_placeholder *ph, const char *s, size_t len) { +bool ltm_template_next_placeholder(ltm_placeholder *ph, const char *s, + size_t len) { ph->start = memchr(s, '{', len - 1); if ((ph->start != NULL) && (ph->start[1] == '{')) { @@ -26,62 +28,119 @@ ltm_err ltm_template_next_placeholder(ltm_placeholder *ph, const char *s, size_t // Non-terminated placeholders aren't valid if ((ph->end == NULL) || (ph->end[1] != '}')) { - return ltm_invalid_template; + ph->type = ltm_placeholder_type_invalid; + + return true; } - } else { - ph->end = NULL; + + // End should point to final character + ph->end++; + + // Parse the words + ph->name.s = ph->start + 2; + + while ((*ph->name.s == ' ') && (ph->name.s != ph->end - 1)) { + ph->name.s++; + } + + // Placeholder is empty + if (ph->name.s == ph->end - 1) { + ph->type = ltm_placeholder_type_invalid; + + return true; + } + + const char *ident = ph->name.s; + + while ((*ident != ' ') && (ident != ph->end - 1)) { + ident++; + } + + ph->name.len = ident - ph->name.s; + + while ((*ident == ' ') && (ident != ph->end - 1)) { + ident++; + } + + if (ident == ph->end - 1) { + ph->type = ltm_placeholder_type_var; + } else { + // Further parse the identifier + const char *temp = ident; + + while ((*temp != ' ') && (temp != ph->end - 1)) { + temp++; + } + + size_t ident_len = temp - ident; + + if (strncmp("loop", ident, ident_len) == 0) { + ph->type = ltm_placeholder_type_loop_start; + } else if (strncmp("end", ident, ident_len) == 0) { + ph->type = ltm_placeholder_type_loop_end; + } else { + ph->type = ltm_placeholder_type_invalid; + } + } + + return true; } - return ltm_err_ok; + return false; } ltm_err ltm_template_compile_n(ltm_template **out, const char *s, size_t len) { ltm_template *template; LTM_RES(ltm_template_init(&template)); - const char *start, *end; + ltm_placeholder ph; + bool in_loop = false; + const char *loop_start = NULL; + size_t loop_depth = 0; + size_t cur_loop_depth = 0; - while (((start = memchr(s, '{', len - 1)) != NULL) && (start[1] == '{')) { - size_t new_len = len - (start - s); + // TODO to ensure the loops are balanced, we should count how many loop starts + // we have seen and only match a loop end if the number matches; this way, we + // can allow arbitrarily nested loops - // Non-terminated placeholders aren't valid - if (((end = memchr(start + 2, '}', new_len - 3)) == NULL) || (end[1] != '}')) { - return ltm_invalid_template; - } - - // TODO actually parse placeholders - const char *word = start + 2; - - while ((*word == ' ') && (word != end)) { - word++; - } - - // The placeholder is empty - if (word == end) { - return ltm_invalid_template; - } - - const char *word2 = word; - - while ((*word2 != ' ') && (word2 != end)) { - word2++; - } - - size_t word_len = word2 - word; - - if (word2 != end) { - while ((*word2 == ' ') && (word2 != end)) { - word2++; + while (ltm_template_next_placeholder(&ph, s, len)) { + switch (ph.type) { + case ltm_placeholder_type_invalid: + return ltm_err_invalid_template; + case ltm_placeholder_type_var: + // TODO add var block + break; + case ltm_placeholder_type_loop_start: + if (!in_loop) { + loop_start = ph.end + 1; + in_loop = true; + loop_depth = cur_loop_depth; } + cur_loop_depth++; + break; + case ltm_placeholder_type_loop_end: + cur_loop_depth--; + + if (in_loop && (cur_loop_depth == loop_depth)) { + size_t loop_len = ph.end - loop_start; + // TODO recursive call to compile + + in_loop = false; + } + // We encountered a loop end without a start + else { + return ltm_err_invalid_template; + } + break; } - // Only one word, so it's a variable - if (word2 == end) { - // TODO register variable block - } + len -= ph.end + 1 - ph.start; + s = ph.end + 1; + } - len = len - (end + 2 - s); - s = end + 2; + // Unfinished loop + if (in_loop) { + return ltm_err_invalid_template; } return ltm_err_ok;