diff --git a/CHANGELOG.md b/CHANGELOG.md index 6843b8e..11ae703 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * `-c` flag to use custom config file (useful for testing) * LSM * Binary tree iterators + * Trie iterators ## [0.2.0](https://git.rustybever.be/Chewing_Bever/lander/src/tag/0.2.0) diff --git a/lsm/include/lsm.h b/lsm/include/lsm.h index 0a1639f..8a86f40 100644 --- a/lsm/include/lsm.h +++ b/lsm/include/lsm.h @@ -27,6 +27,7 @@ typedef enum lsm_error { lsm_error_null_value = 4, lsm_error_failed_io = 5, lsm_error_lock_busy = 6, + lsm_error_done = 7, } lsm_error; /*typedef struct lsm_string { */ diff --git a/lsm/include/lsm/bt.h b/lsm/include/lsm/bt.h index 67655d4..6d79f4f 100644 --- a/lsm/include/lsm/bt.h +++ b/lsm/include/lsm/bt.h @@ -102,6 +102,6 @@ void lsm_bt_iter(lsm_bt_iterator *out, const lsm_bt *bt); * @return true if a new entry was returned, false if the iterator has no more * entries to return */ -bool lsm_bt_iter_next(const void **out, char *key_out, lsm_bt_iterator *iter); +bool lsm_bt_iter_next(void **out, char *key_out, lsm_bt_iterator *iter); #endif diff --git a/lsm/include/lsm/trie.h b/lsm/include/lsm/trie.h index 58db0b6..17a9df5 100644 --- a/lsm/include/lsm/trie.h +++ b/lsm/include/lsm/trie.h @@ -1,7 +1,10 @@ #ifndef LSM_TRIE #define LSM_TRIE +#include + #include "lsm.h" +#include "lsm/bt.h" #include "lsm/str.h" /** @@ -9,6 +12,11 @@ */ typedef struct lsm_trie lsm_trie; +/** + * A node inside an `lsm_trie` trie + */ +typedef struct lsm_trie_node lsm_trie_node; + /** * Initialize a new trie. * @@ -57,4 +65,36 @@ lsm_error lsm_trie_remove(void **out, lsm_trie *trie, const lsm_str *key); */ uint64_t lsm_trie_size(const lsm_trie *trie); +/** + * Represents an in-flight iterator over a trie + */ +typedef struct lsm_trie_iterator lsm_trie_iterator; + +/** + * Initialize an iterator to iterate over all entries with keys starting + * with the given prefix. + * + * @param out pointer to store iterator pointer in + * @param trie trie to iterate + * @param prefix prefix of the keys; a zero-length string means iterating over + * the entire trie; NULL is interpreted as a zero-length string + */ +lsm_error lsm_trie_iter(lsm_trie_iterator **out, const lsm_trie *trie, + const lsm_str *prefix); + +/** + * Advance the given iterator. + * + * @param out pointer to store next data pointer in + * @param iter iterator to advance + * @return `lsm_error_ok` if next element has been returned, `lsm_error_done` if + * no more elements are present, or some other error code + */ +lsm_error lsm_trie_iter_next(void **out, lsm_trie_iterator *iter); + +/** + * Free the given iterator. + */ +void lsm_trie_iter_free(lsm_trie_iterator *iter); + #endif diff --git a/lsm/src/_include/lsm/trie_internal.h b/lsm/src/_include/lsm/trie_internal.h index e3526d9..4259e82 100644 --- a/lsm/src/_include/lsm/trie_internal.h +++ b/lsm/src/_include/lsm/trie_internal.h @@ -5,14 +5,11 @@ #include "lsm/str_internal.h" #include "lsm/trie.h" -/** - * A node inside a trie structure - */ -typedef struct lsm_trie_node { +struct lsm_trie_node { lsm_bt bt; lsm_str skip; void *data; -} lsm_trie_node; +}; /** * Allocate and initialize a new trie node @@ -33,4 +30,13 @@ struct lsm_trie { uint64_t size; }; +struct lsm_trie_iterator { + lsm_trie_node *next; + struct { + lsm_bt_iterator *arr; + size_t len; + size_t cap; + } stack; +}; + #endif diff --git a/lsm/src/bt/lsm_bt.c b/lsm/src/bt/lsm_bt.c index 501dbf8..35c9bb4 100644 --- a/lsm/src/bt/lsm_bt.c +++ b/lsm/src/bt/lsm_bt.c @@ -184,7 +184,7 @@ void lsm_bt_iter(lsm_bt_iterator *out, const lsm_bt *bt) { } } -bool lsm_bt_iter_next(const void **out, char *key_out, lsm_bt_iterator *iter) { +bool lsm_bt_iter_next(void **out, char *key_out, lsm_bt_iterator *iter) { if (iter->next == NULL) { return false; } diff --git a/lsm/src/trie/lsm_trie_iter.c b/lsm/src/trie/lsm_trie_iter.c new file mode 100644 index 0000000..caadb10 --- /dev/null +++ b/lsm/src/trie/lsm_trie_iter.c @@ -0,0 +1,117 @@ +#include "lsm.h" +#include "lsm/trie_internal.h" + +/** + * Advance the iterator so that `next` now points to a node containing data. If + * no more data nodes are found, `next` is set to NULL. The initial value of + * `next` is ignored. + */ +lsm_error lsm_trie_iter_next_data_node(lsm_trie_iterator *iter) { + do { + // Traverse back up the stack until we reach either the end of the subtree, + // or an iterator that hasn't been exhausted yet + while ((iter->stack.len > 0) && + !lsm_bt_iter_next((void **)&iter->next, NULL, + &iter->stack.arr[iter->stack.len - 1])) { + iter->stack.len--; + } + + if (iter->stack.len == 0) { + iter->next = NULL; + + return lsm_error_ok; + } + + // Add node's subtree to the stack + if (iter->stack.len == iter->stack.cap) { + lsm_bt_iterator *arr = realloc( + iter->stack.arr, iter->stack.cap * 2 * sizeof(lsm_bt_iterator)); + + if (arr == NULL) { + return lsm_error_failed_alloc; + } + + iter->stack.arr = arr; + iter->stack.cap *= 2; + } + + lsm_bt_iter(&iter->stack.arr[iter->stack.len], &iter->next->bt); + iter->stack.len++; + + } while (iter->next->data == NULL); + + return lsm_error_ok; +} + +lsm_error lsm_trie_iter(lsm_trie_iterator **out, const lsm_trie *trie, + const lsm_str *prefix) { + uint64_t prefix_len = prefix == NULL ? 0 : lsm_str_len(prefix); + + uint64_t index = 0; + lsm_trie_node *node = trie->root; + + // Simplified traversal down the trie to find the root of the subtree + // contaiing all nodes with the given prefix + while ((index < prefix_len) && (node != NULL)) { + char c = lsm_str_char(prefix, index); + lsm_error res = lsm_bt_search((void **)&node, &node->bt, c); + + switch (res) { + case lsm_error_ok: + index += 1 + lsm_str_len(&node->skip); + break; + case lsm_error_not_found: + node = NULL; + break; + default: + return res; + }; + } + + lsm_trie_iterator *iter = calloc(1, sizeof(lsm_trie_iterator)); + + if (iter == NULL) { + return lsm_error_failed_alloc; + } + + iter->next = node; + + if (node != NULL) { + lsm_bt_iterator *arr = malloc(sizeof(lsm_bt_iterator)); + + if (arr == NULL) { + free(iter); + + return lsm_error_failed_alloc; + } + + iter->stack.arr = arr; + iter->stack.cap = 1; + iter->stack.len = 1; + + lsm_bt_iter(&iter->stack.arr[0], &node->bt); + + if (node->data == NULL) { + LSM_RES(lsm_trie_iter_next_data_node(iter)); + } + } + + *out = iter; + + return lsm_error_ok; +} + +lsm_error lsm_trie_iter_next(void **out, lsm_trie_iterator *iter) { + if (iter->next == NULL) { + return lsm_error_done; + } + + *out = iter->next->data; + + return lsm_trie_iter_next_data_node(iter); +} + +void lsm_trie_iter_free(lsm_trie_iterator *iter) { + free(iter->stack.arr); + free(iter); +} diff --git a/lsm/test/bt/bt.c b/lsm/test/bt/bt.c index 908f35a..9206e1f 100644 --- a/lsm/test/bt/bt.c +++ b/lsm/test/bt/bt.c @@ -120,7 +120,7 @@ void test_iter() { lsm_bt_iter(&iter, bt); char key; - const void *data; + void *data; size_t i = 0; while (lsm_bt_iter_next(&data, &key, &iter)) { diff --git a/lsm/test/trie/trie.c b/lsm/test/trie/trie.c index db3e6ee..a1e2340 100644 --- a/lsm/test/trie/trie.c +++ b/lsm/test/trie/trie.c @@ -25,8 +25,64 @@ void test_insert_one() { TEST_CHECK(data == (void *)1); } +void test_iter() { + TRIE_INIT(); + + lsm_str *s; + + lsm_str_init_copy(&s, "hello"); + TEST_CHECK(lsm_trie_insert(trie, s, (void *)1) == lsm_error_ok); + + lsm_str_init_copy(&s, "hella"); + TEST_CHECK(lsm_trie_insert(trie, s, (void *)2) == lsm_error_ok); + + lsm_str_init_copy(&s, "hel"); + TEST_CHECK(lsm_trie_insert(trie, s, (void *)3) == lsm_error_ok); + + lsm_str_init_copy(&s, "wow"); + TEST_CHECK(lsm_trie_insert(trie, s, (void *)4) == lsm_error_ok); + + lsm_str_init_copy(&s, "hel"); + + lsm_trie_iterator *iter; + TEST_CHECK(lsm_trie_iter(&iter, trie, s) == lsm_error_ok); + + void *data; + + TEST_CHECK(lsm_trie_iter_next(&data, iter) == lsm_error_ok); + TEST_CHECK_(data == (void *)3, "%p == %p", data, (void *)3); + + TEST_CHECK(lsm_trie_iter_next(&data, iter) == lsm_error_ok); + TEST_CHECK_(data == (void *)2, "%p == %p", data, (void *)2); + + TEST_CHECK(lsm_trie_iter_next(&data, iter) == lsm_error_ok); + TEST_CHECK_(data == (void *)1, "%p == %p", data, (void *)1); + + TEST_CHECK(lsm_trie_iter_next(&data, iter) == lsm_error_done); + lsm_trie_iter_free(iter); + + // Test full trie iterator + TEST_CHECK(lsm_trie_iter(&iter, trie, NULL) == lsm_error_ok); + + TEST_CHECK(lsm_trie_iter_next(&data, iter) == lsm_error_ok); + TEST_CHECK_(data == (void *)3, "%p == %p", data, (void *)3); + + TEST_CHECK(lsm_trie_iter_next(&data, iter) == lsm_error_ok); + TEST_CHECK_(data == (void *)2, "%p == %p", data, (void *)2); + + TEST_CHECK(lsm_trie_iter_next(&data, iter) == lsm_error_ok); + TEST_CHECK_(data == (void *)1, "%p == %p", data, (void *)1); + + TEST_CHECK(lsm_trie_iter_next(&data, iter) == lsm_error_ok); + TEST_CHECK_(data == (void *)4, "%p == %p", data, (void *)4); + + TEST_CHECK(lsm_trie_iter_next(&data, iter) == lsm_error_done); + lsm_trie_iter_free(iter); +} + TEST_LIST = { { "trie init", test_init }, { "trie insert one", test_insert_one }, + { "trie iter", test_iter }, { NULL, NULL } };