diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a64beb..11ae703 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * Configurable multithreading using `epoll` * Landerctl * `-c` flag to use custom config file (useful for testing) +* LSM + * Binary tree iterators + * Trie iterators ## [0.2.0](https://git.rustybever.be/Chewing_Bever/lander/src/tag/0.2.0) diff --git a/lsm/include/lsm.h b/lsm/include/lsm.h index 0a1639f..8a86f40 100644 --- a/lsm/include/lsm.h +++ b/lsm/include/lsm.h @@ -27,6 +27,7 @@ typedef enum lsm_error { lsm_error_null_value = 4, lsm_error_failed_io = 5, lsm_error_lock_busy = 6, + lsm_error_done = 7, } lsm_error; /*typedef struct lsm_string { */ diff --git a/lsm/include/lsm/bt.h b/lsm/include/lsm/bt.h index 581288f..6d79f4f 100644 --- a/lsm/include/lsm/bt.h +++ b/lsm/include/lsm/bt.h @@ -1,6 +1,8 @@ #ifndef LSM_BT #define LSM_BT +#include + #include "lsm.h" /** @@ -8,6 +10,11 @@ */ typedef struct lsm_bt lsm_bt; +/** + * A node inside an `lsm_bt` binary tree. + */ +typedef struct lsm_bt_node lsm_bt_node; + /** * Initialize a new binary tree * @@ -68,4 +75,33 @@ lsm_error lsm_bt_remove(void **out, lsm_bt *bt, char key); */ lsm_error lsm_bt_replace(void **out, lsm_bt *bt, char key, void *data); +/** + * Struct representing an in-flight iterator over the binary tree + */ +typedef struct lsm_bt_iterator { + lsm_bt_node *next; +} lsm_bt_iterator; + +/** + * Initialize the given iterator for the binary tree. + * + * The iterator is explicitely allowed to be allocated by the user, as these are + * commonly used inside functions where they can simply be stored on the stack. + * + * @param out iterator to initialize + * @param bt binary tree to iterate + */ +void lsm_bt_iter(lsm_bt_iterator *out, const lsm_bt *bt); + +/** + * Advance the iterator to the next element. + * + * @param out where to store pointer to data; ignored if NULL + * @param key_out where to store key; ignored if NULL + * @param iter iterator to advance + * @return true if a new entry was returned, false if the iterator has no more + * entries to return + */ +bool lsm_bt_iter_next(void **out, char *key_out, lsm_bt_iterator *iter); + #endif diff --git a/lsm/include/lsm/trie.h b/lsm/include/lsm/trie.h index 58db0b6..17a9df5 100644 --- a/lsm/include/lsm/trie.h +++ b/lsm/include/lsm/trie.h @@ -1,7 +1,10 @@ #ifndef LSM_TRIE #define LSM_TRIE +#include + #include "lsm.h" +#include "lsm/bt.h" #include "lsm/str.h" /** @@ -9,6 +12,11 @@ */ typedef struct lsm_trie lsm_trie; +/** + * A node inside an `lsm_trie` trie + */ +typedef struct lsm_trie_node lsm_trie_node; + /** * Initialize a new trie. * @@ -57,4 +65,36 @@ lsm_error lsm_trie_remove(void **out, lsm_trie *trie, const lsm_str *key); */ uint64_t lsm_trie_size(const lsm_trie *trie); +/** + * Represents an in-flight iterator over a trie + */ +typedef struct lsm_trie_iterator lsm_trie_iterator; + +/** + * Initialize an iterator to iterate over all entries with keys starting + * with the given prefix. + * + * @param out pointer to store iterator pointer in + * @param trie trie to iterate + * @param prefix prefix of the keys; a zero-length string means iterating over + * the entire trie; NULL is interpreted as a zero-length string + */ +lsm_error lsm_trie_iter(lsm_trie_iterator **out, const lsm_trie *trie, + const lsm_str *prefix); + +/** + * Advance the given iterator. + * + * @param out pointer to store next data pointer in + * @param iter iterator to advance + * @return `lsm_error_ok` if next element has been returned, `lsm_error_done` if + * no more elements are present, or some other error code + */ +lsm_error lsm_trie_iter_next(void **out, lsm_trie_iterator *iter); + +/** + * Free the given iterator. + */ +void lsm_trie_iter_free(lsm_trie_iterator *iter); + #endif diff --git a/lsm/src/_include/lsm/bt_internal.h b/lsm/src/_include/lsm/bt_internal.h index 4b55771..ed353a6 100644 --- a/lsm/src/_include/lsm/bt_internal.h +++ b/lsm/src/_include/lsm/bt_internal.h @@ -9,12 +9,13 @@ /** * Node inside a binary tree */ -typedef struct lsm_bt_node { +struct lsm_bt_node { struct lsm_bt_node *left; struct lsm_bt_node *right; + struct lsm_bt_node *parent; void *data; char key; -} lsm_bt_node; +}; /** * Initialize a new binary tree node diff --git a/lsm/src/_include/lsm/trie_internal.h b/lsm/src/_include/lsm/trie_internal.h index e3526d9..4259e82 100644 --- a/lsm/src/_include/lsm/trie_internal.h +++ b/lsm/src/_include/lsm/trie_internal.h @@ -5,14 +5,11 @@ #include "lsm/str_internal.h" #include "lsm/trie.h" -/** - * A node inside a trie structure - */ -typedef struct lsm_trie_node { +struct lsm_trie_node { lsm_bt bt; lsm_str skip; void *data; -} lsm_trie_node; +}; /** * Allocate and initialize a new trie node @@ -33,4 +30,13 @@ struct lsm_trie { uint64_t size; }; +struct lsm_trie_iterator { + lsm_trie_node *next; + struct { + lsm_bt_iterator *arr; + size_t len; + size_t cap; + } stack; +}; + #endif diff --git a/lsm/src/bt/lsm_bt.c b/lsm/src/bt/lsm_bt.c index c09fa01..35c9bb4 100644 --- a/lsm/src/bt/lsm_bt.c +++ b/lsm/src/bt/lsm_bt.c @@ -1,5 +1,6 @@ #include +#include "lsm/bt.h" #include "lsm/bt_internal.h" lsm_error lsm_bt_node_init(lsm_bt_node **ptr, const char key, void *data) { @@ -60,10 +61,12 @@ void lsm_bt_free(lsm_bt *bt) { uint64_t lsm_bt_size(const lsm_bt *bt) { return bt->size; } lsm_error lsm_bt_insert(lsm_bt *bt, char key, void *data) { + lsm_bt_node *parent = NULL; lsm_bt_node **dest = &bt->root; // Traverse down the tree until we reach the new point to insert our node while ((*dest != NULL) && ((*dest)->key != key)) { + parent = *dest; dest = key < (*dest)->key ? &(*dest)->left : &(*dest)->right; } @@ -71,9 +74,8 @@ lsm_error lsm_bt_insert(lsm_bt *bt, char key, void *data) { return lsm_error_already_present; } - if (lsm_bt_node_init(dest, key, data) != lsm_error_ok) { - return lsm_error_failed_alloc; - } + LSM_RES(lsm_bt_node_init(dest, key, data)); + (*dest)->parent = parent; bt->size++; @@ -125,13 +127,27 @@ lsm_error lsm_bt_remove(void **out, lsm_bt *bt, char key) { (*dest)->data = (*succ)->data; lsm_bt_node *succ_replacement = (*succ)->right; + lsm_bt_node *parent = (*succ)->parent; + lsm_bt_node_free(*succ); + *succ = succ_replacement; + + if (*succ != NULL) { + (*succ)->parent = parent; + } } else { lsm_bt_node *replacement = (*dest)->left != NULL ? (*dest)->left : (*dest)->right; + lsm_bt_node *parent = (*dest)->parent; + lsm_bt_node_free(*dest); + *dest = replacement; + + if (*dest != NULL) { + (*dest)->parent = parent; + } } return lsm_error_ok; @@ -156,3 +172,45 @@ lsm_error lsm_bt_replace(void **out, lsm_bt *bt, char key, void *data) { return lsm_error_ok; } + +void lsm_bt_iter(lsm_bt_iterator *out, const lsm_bt *bt) { + out->next = bt->root; + + if (bt->root != NULL) { + // Initialize the iterator to the smallest element in the tree + while (out->next->left != NULL) { + out->next = out->next->left; + } + } +} + +bool lsm_bt_iter_next(void **out, char *key_out, lsm_bt_iterator *iter) { + if (iter->next == NULL) { + return false; + } + + if (out != NULL) { + *out = iter->next->data; + } + + if (key_out != NULL) { + *key_out = iter->next->key; + } + + if (iter->next->right != NULL) { + iter->next = iter->next->right; + + while (iter->next->left != NULL) { + iter->next = iter->next->left; + } + } else { + while ((iter->next->parent != NULL) && + (iter->next->parent->right == iter->next)) { + iter->next = iter->next->parent; + } + + iter->next = iter->next->parent; + } + + return true; +} diff --git a/lsm/src/trie/lsm_trie_iter.c b/lsm/src/trie/lsm_trie_iter.c new file mode 100644 index 0000000..caadb10 --- /dev/null +++ b/lsm/src/trie/lsm_trie_iter.c @@ -0,0 +1,117 @@ +#include "lsm.h" +#include "lsm/trie_internal.h" + +/** + * Advance the iterator so that `next` now points to a node containing data. If + * no more data nodes are found, `next` is set to NULL. The initial value of + * `next` is ignored. + */ +lsm_error lsm_trie_iter_next_data_node(lsm_trie_iterator *iter) { + do { + // Traverse back up the stack until we reach either the end of the subtree, + // or an iterator that hasn't been exhausted yet + while ((iter->stack.len > 0) && + !lsm_bt_iter_next((void **)&iter->next, NULL, + &iter->stack.arr[iter->stack.len - 1])) { + iter->stack.len--; + } + + if (iter->stack.len == 0) { + iter->next = NULL; + + return lsm_error_ok; + } + + // Add node's subtree to the stack + if (iter->stack.len == iter->stack.cap) { + lsm_bt_iterator *arr = realloc( + iter->stack.arr, iter->stack.cap * 2 * sizeof(lsm_bt_iterator)); + + if (arr == NULL) { + return lsm_error_failed_alloc; + } + + iter->stack.arr = arr; + iter->stack.cap *= 2; + } + + lsm_bt_iter(&iter->stack.arr[iter->stack.len], &iter->next->bt); + iter->stack.len++; + + } while (iter->next->data == NULL); + + return lsm_error_ok; +} + +lsm_error lsm_trie_iter(lsm_trie_iterator **out, const lsm_trie *trie, + const lsm_str *prefix) { + uint64_t prefix_len = prefix == NULL ? 0 : lsm_str_len(prefix); + + uint64_t index = 0; + lsm_trie_node *node = trie->root; + + // Simplified traversal down the trie to find the root of the subtree + // contaiing all nodes with the given prefix + while ((index < prefix_len) && (node != NULL)) { + char c = lsm_str_char(prefix, index); + lsm_error res = lsm_bt_search((void **)&node, &node->bt, c); + + switch (res) { + case lsm_error_ok: + index += 1 + lsm_str_len(&node->skip); + break; + case lsm_error_not_found: + node = NULL; + break; + default: + return res; + }; + } + + lsm_trie_iterator *iter = calloc(1, sizeof(lsm_trie_iterator)); + + if (iter == NULL) { + return lsm_error_failed_alloc; + } + + iter->next = node; + + if (node != NULL) { + lsm_bt_iterator *arr = malloc(sizeof(lsm_bt_iterator)); + + if (arr == NULL) { + free(iter); + + return lsm_error_failed_alloc; + } + + iter->stack.arr = arr; + iter->stack.cap = 1; + iter->stack.len = 1; + + lsm_bt_iter(&iter->stack.arr[0], &node->bt); + + if (node->data == NULL) { + LSM_RES(lsm_trie_iter_next_data_node(iter)); + } + } + + *out = iter; + + return lsm_error_ok; +} + +lsm_error lsm_trie_iter_next(void **out, lsm_trie_iterator *iter) { + if (iter->next == NULL) { + return lsm_error_done; + } + + *out = iter->next->data; + + return lsm_trie_iter_next_data_node(iter); +} + +void lsm_trie_iter_free(lsm_trie_iterator *iter) { + free(iter->stack.arr); + free(iter); +} diff --git a/lsm/test/bt/bt.c b/lsm/test/bt/bt.c index fdff839..9206e1f 100644 --- a/lsm/test/bt/bt.c +++ b/lsm/test/bt/bt.c @@ -104,6 +104,33 @@ void test_remove_multiple() { lsm_bt_free(bt); } +void test_iter() { + char chars[] = "falcoep"; + size_t char_count = sizeof(chars) / sizeof(char) - 1; + + char sorted_chars[] = "aceflop"; + + BT_INIT(); + + for (size_t i = 0; i < char_count; i++) { + TEST_CHECK(lsm_bt_insert(bt, chars[i], (void *)(i + 1)) == lsm_error_ok); + } + + lsm_bt_iterator iter; + lsm_bt_iter(&iter, bt); + + char key; + void *data; + size_t i = 0; + + while (lsm_bt_iter_next(&data, &key, &iter)) { + TEST_CHECK_(key == sorted_chars[i], "%c == %c", key, sorted_chars[i]); + i++; + } + + TEST_CHECK(i == char_count); +} + TEST_LIST = { { "bt init", test_init }, { "bt insert first", test_insert_first }, @@ -111,5 +138,6 @@ TEST_LIST = { { "bt insert multiple", test_insert_multiple }, { "bt remove root", test_remove_root }, { "bt remove multiple", test_remove_multiple }, + { "bt iter", test_iter }, { NULL, NULL } }; diff --git a/lsm/test/trie/trie.c b/lsm/test/trie/trie.c index db3e6ee..a1e2340 100644 --- a/lsm/test/trie/trie.c +++ b/lsm/test/trie/trie.c @@ -25,8 +25,64 @@ void test_insert_one() { TEST_CHECK(data == (void *)1); } +void test_iter() { + TRIE_INIT(); + + lsm_str *s; + + lsm_str_init_copy(&s, "hello"); + TEST_CHECK(lsm_trie_insert(trie, s, (void *)1) == lsm_error_ok); + + lsm_str_init_copy(&s, "hella"); + TEST_CHECK(lsm_trie_insert(trie, s, (void *)2) == lsm_error_ok); + + lsm_str_init_copy(&s, "hel"); + TEST_CHECK(lsm_trie_insert(trie, s, (void *)3) == lsm_error_ok); + + lsm_str_init_copy(&s, "wow"); + TEST_CHECK(lsm_trie_insert(trie, s, (void *)4) == lsm_error_ok); + + lsm_str_init_copy(&s, "hel"); + + lsm_trie_iterator *iter; + TEST_CHECK(lsm_trie_iter(&iter, trie, s) == lsm_error_ok); + + void *data; + + TEST_CHECK(lsm_trie_iter_next(&data, iter) == lsm_error_ok); + TEST_CHECK_(data == (void *)3, "%p == %p", data, (void *)3); + + TEST_CHECK(lsm_trie_iter_next(&data, iter) == lsm_error_ok); + TEST_CHECK_(data == (void *)2, "%p == %p", data, (void *)2); + + TEST_CHECK(lsm_trie_iter_next(&data, iter) == lsm_error_ok); + TEST_CHECK_(data == (void *)1, "%p == %p", data, (void *)1); + + TEST_CHECK(lsm_trie_iter_next(&data, iter) == lsm_error_done); + lsm_trie_iter_free(iter); + + // Test full trie iterator + TEST_CHECK(lsm_trie_iter(&iter, trie, NULL) == lsm_error_ok); + + TEST_CHECK(lsm_trie_iter_next(&data, iter) == lsm_error_ok); + TEST_CHECK_(data == (void *)3, "%p == %p", data, (void *)3); + + TEST_CHECK(lsm_trie_iter_next(&data, iter) == lsm_error_ok); + TEST_CHECK_(data == (void *)2, "%p == %p", data, (void *)2); + + TEST_CHECK(lsm_trie_iter_next(&data, iter) == lsm_error_ok); + TEST_CHECK_(data == (void *)1, "%p == %p", data, (void *)1); + + TEST_CHECK(lsm_trie_iter_next(&data, iter) == lsm_error_ok); + TEST_CHECK_(data == (void *)4, "%p == %p", data, (void *)4); + + TEST_CHECK(lsm_trie_iter_next(&data, iter) == lsm_error_done); + lsm_trie_iter_free(iter); +} + TEST_LIST = { { "trie init", test_init }, { "trie insert one", test_insert_one }, + { "trie iter", test_iter }, { NULL, NULL } };