Compare commits

..

2 Commits

Author SHA1 Message Date
Jef Roosens 83072d5441
feat(lsm): iterator over items in trie
ci/woodpecker/push/build Pipeline was successful Details
2023-12-23 10:06:02 +01:00
Jef Roosens 90826c3644
feat(lsm): binary tree iterators 2023-12-22 22:07:09 +01:00
10 changed files with 356 additions and 10 deletions

View File

@ -22,6 +22,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
* Configurable multithreading using `epoll`
* Landerctl
* `-c` flag to use custom config file (useful for testing)
* LSM
* Binary tree iterators
* Trie iterators
## [0.2.0](https://git.rustybever.be/Chewing_Bever/lander/src/tag/0.2.0)

View File

@ -27,6 +27,7 @@ typedef enum lsm_error {
lsm_error_null_value = 4,
lsm_error_failed_io = 5,
lsm_error_lock_busy = 6,
lsm_error_done = 7,
} lsm_error;
/*typedef struct lsm_string { */

View File

@ -1,6 +1,8 @@
#ifndef LSM_BT
#define LSM_BT
#include <stdbool.h>
#include "lsm.h"
/**
@ -8,6 +10,11 @@
*/
typedef struct lsm_bt lsm_bt;
/**
* A node inside an `lsm_bt` binary tree.
*/
typedef struct lsm_bt_node lsm_bt_node;
/**
* Initialize a new binary tree
*
@ -68,4 +75,33 @@ lsm_error lsm_bt_remove(void **out, lsm_bt *bt, char key);
*/
lsm_error lsm_bt_replace(void **out, lsm_bt *bt, char key, void *data);
/**
* Struct representing an in-flight iterator over the binary tree
*/
typedef struct lsm_bt_iterator {
lsm_bt_node *next;
} lsm_bt_iterator;
/**
* Initialize the given iterator for the binary tree.
*
* The iterator is explicitely allowed to be allocated by the user, as these are
* commonly used inside functions where they can simply be stored on the stack.
*
* @param out iterator to initialize
* @param bt binary tree to iterate
*/
void lsm_bt_iter(lsm_bt_iterator *out, const lsm_bt *bt);
/**
* Advance the iterator to the next element.
*
* @param out where to store pointer to data; ignored if NULL
* @param key_out where to store key; ignored if NULL
* @param iter iterator to advance
* @return true if a new entry was returned, false if the iterator has no more
* entries to return
*/
bool lsm_bt_iter_next(void **out, char *key_out, lsm_bt_iterator *iter);
#endif

View File

@ -1,7 +1,10 @@
#ifndef LSM_TRIE
#define LSM_TRIE
#include <stdlib.h>
#include "lsm.h"
#include "lsm/bt.h"
#include "lsm/str.h"
/**
@ -9,6 +12,11 @@
*/
typedef struct lsm_trie lsm_trie;
/**
* A node inside an `lsm_trie` trie
*/
typedef struct lsm_trie_node lsm_trie_node;
/**
* Initialize a new trie.
*
@ -57,4 +65,36 @@ lsm_error lsm_trie_remove(void **out, lsm_trie *trie, const lsm_str *key);
*/
uint64_t lsm_trie_size(const lsm_trie *trie);
/**
* Represents an in-flight iterator over a trie
*/
typedef struct lsm_trie_iterator lsm_trie_iterator;
/**
* Initialize an iterator to iterate over all entries with keys starting
* with the given prefix.
*
* @param out pointer to store iterator pointer in
* @param trie trie to iterate
* @param prefix prefix of the keys; a zero-length string means iterating over
* the entire trie; NULL is interpreted as a zero-length string
*/
lsm_error lsm_trie_iter(lsm_trie_iterator **out, const lsm_trie *trie,
const lsm_str *prefix);
/**
* Advance the given iterator.
*
* @param out pointer to store next data pointer in
* @param iter iterator to advance
* @return `lsm_error_ok` if next element has been returned, `lsm_error_done` if
* no more elements are present, or some other error code
*/
lsm_error lsm_trie_iter_next(void **out, lsm_trie_iterator *iter);
/**
* Free the given iterator.
*/
void lsm_trie_iter_free(lsm_trie_iterator *iter);
#endif

View File

@ -9,12 +9,13 @@
/**
* Node inside a binary tree
*/
typedef struct lsm_bt_node {
struct lsm_bt_node {
struct lsm_bt_node *left;
struct lsm_bt_node *right;
struct lsm_bt_node *parent;
void *data;
char key;
} lsm_bt_node;
};
/**
* Initialize a new binary tree node

View File

@ -5,14 +5,11 @@
#include "lsm/str_internal.h"
#include "lsm/trie.h"
/**
* A node inside a trie structure
*/
typedef struct lsm_trie_node {
struct lsm_trie_node {
lsm_bt bt;
lsm_str skip;
void *data;
} lsm_trie_node;
};
/**
* Allocate and initialize a new trie node
@ -33,4 +30,13 @@ struct lsm_trie {
uint64_t size;
};
struct lsm_trie_iterator {
lsm_trie_node *next;
struct {
lsm_bt_iterator *arr;
size_t len;
size_t cap;
} stack;
};
#endif

View File

@ -1,5 +1,6 @@
#include <stdlib.h>
#include "lsm/bt.h"
#include "lsm/bt_internal.h"
lsm_error lsm_bt_node_init(lsm_bt_node **ptr, const char key, void *data) {
@ -60,10 +61,12 @@ void lsm_bt_free(lsm_bt *bt) {
uint64_t lsm_bt_size(const lsm_bt *bt) { return bt->size; }
lsm_error lsm_bt_insert(lsm_bt *bt, char key, void *data) {
lsm_bt_node *parent = NULL;
lsm_bt_node **dest = &bt->root;
// Traverse down the tree until we reach the new point to insert our node
while ((*dest != NULL) && ((*dest)->key != key)) {
parent = *dest;
dest = key < (*dest)->key ? &(*dest)->left : &(*dest)->right;
}
@ -71,9 +74,8 @@ lsm_error lsm_bt_insert(lsm_bt *bt, char key, void *data) {
return lsm_error_already_present;
}
if (lsm_bt_node_init(dest, key, data) != lsm_error_ok) {
return lsm_error_failed_alloc;
}
LSM_RES(lsm_bt_node_init(dest, key, data));
(*dest)->parent = parent;
bt->size++;
@ -125,13 +127,27 @@ lsm_error lsm_bt_remove(void **out, lsm_bt *bt, char key) {
(*dest)->data = (*succ)->data;
lsm_bt_node *succ_replacement = (*succ)->right;
lsm_bt_node *parent = (*succ)->parent;
lsm_bt_node_free(*succ);
*succ = succ_replacement;
if (*succ != NULL) {
(*succ)->parent = parent;
}
} else {
lsm_bt_node *replacement =
(*dest)->left != NULL ? (*dest)->left : (*dest)->right;
lsm_bt_node *parent = (*dest)->parent;
lsm_bt_node_free(*dest);
*dest = replacement;
if (*dest != NULL) {
(*dest)->parent = parent;
}
}
return lsm_error_ok;
@ -156,3 +172,45 @@ lsm_error lsm_bt_replace(void **out, lsm_bt *bt, char key, void *data) {
return lsm_error_ok;
}
void lsm_bt_iter(lsm_bt_iterator *out, const lsm_bt *bt) {
out->next = bt->root;
if (bt->root != NULL) {
// Initialize the iterator to the smallest element in the tree
while (out->next->left != NULL) {
out->next = out->next->left;
}
}
}
bool lsm_bt_iter_next(void **out, char *key_out, lsm_bt_iterator *iter) {
if (iter->next == NULL) {
return false;
}
if (out != NULL) {
*out = iter->next->data;
}
if (key_out != NULL) {
*key_out = iter->next->key;
}
if (iter->next->right != NULL) {
iter->next = iter->next->right;
while (iter->next->left != NULL) {
iter->next = iter->next->left;
}
} else {
while ((iter->next->parent != NULL) &&
(iter->next->parent->right == iter->next)) {
iter->next = iter->next->parent;
}
iter->next = iter->next->parent;
}
return true;
}

View File

@ -0,0 +1,117 @@
#include "lsm.h"
#include "lsm/trie_internal.h"
/**
* Advance the iterator so that `next` now points to a node containing data. If
* no more data nodes are found, `next` is set to NULL. The initial value of
* `next` is ignored.
*/
lsm_error lsm_trie_iter_next_data_node(lsm_trie_iterator *iter) {
do {
// Traverse back up the stack until we reach either the end of the subtree,
// or an iterator that hasn't been exhausted yet
while ((iter->stack.len > 0) &&
!lsm_bt_iter_next((void **)&iter->next, NULL,
&iter->stack.arr[iter->stack.len - 1])) {
iter->stack.len--;
}
if (iter->stack.len == 0) {
iter->next = NULL;
return lsm_error_ok;
}
// Add node's subtree to the stack
if (iter->stack.len == iter->stack.cap) {
lsm_bt_iterator *arr = realloc(
iter->stack.arr, iter->stack.cap * 2 * sizeof(lsm_bt_iterator));
if (arr == NULL) {
return lsm_error_failed_alloc;
}
iter->stack.arr = arr;
iter->stack.cap *= 2;
}
lsm_bt_iter(&iter->stack.arr[iter->stack.len], &iter->next->bt);
iter->stack.len++;
} while (iter->next->data == NULL);
return lsm_error_ok;
}
lsm_error lsm_trie_iter(lsm_trie_iterator **out, const lsm_trie *trie,
const lsm_str *prefix) {
uint64_t prefix_len = prefix == NULL ? 0 : lsm_str_len(prefix);
uint64_t index = 0;
lsm_trie_node *node = trie->root;
// Simplified traversal down the trie to find the root of the subtree
// contaiing all nodes with the given prefix
while ((index < prefix_len) && (node != NULL)) {
char c = lsm_str_char(prefix, index);
lsm_error res = lsm_bt_search((void **)&node, &node->bt, c);
switch (res) {
case lsm_error_ok:
index += 1 + lsm_str_len(&node->skip);
break;
case lsm_error_not_found:
node = NULL;
break;
default:
return res;
};
}
lsm_trie_iterator *iter = calloc(1, sizeof(lsm_trie_iterator));
if (iter == NULL) {
return lsm_error_failed_alloc;
}
iter->next = node;
if (node != NULL) {
lsm_bt_iterator *arr = malloc(sizeof(lsm_bt_iterator));
if (arr == NULL) {
free(iter);
return lsm_error_failed_alloc;
}
iter->stack.arr = arr;
iter->stack.cap = 1;
iter->stack.len = 1;
lsm_bt_iter(&iter->stack.arr[0], &node->bt);
if (node->data == NULL) {
LSM_RES(lsm_trie_iter_next_data_node(iter));
}
}
*out = iter;
return lsm_error_ok;
}
lsm_error lsm_trie_iter_next(void **out, lsm_trie_iterator *iter) {
if (iter->next == NULL) {
return lsm_error_done;
}
*out = iter->next->data;
return lsm_trie_iter_next_data_node(iter);
}
void lsm_trie_iter_free(lsm_trie_iterator *iter) {
free(iter->stack.arr);
free(iter);
}

View File

@ -104,6 +104,33 @@ void test_remove_multiple() {
lsm_bt_free(bt);
}
void test_iter() {
char chars[] = "falcoep";
size_t char_count = sizeof(chars) / sizeof(char) - 1;
char sorted_chars[] = "aceflop";
BT_INIT();
for (size_t i = 0; i < char_count; i++) {
TEST_CHECK(lsm_bt_insert(bt, chars[i], (void *)(i + 1)) == lsm_error_ok);
}
lsm_bt_iterator iter;
lsm_bt_iter(&iter, bt);
char key;
void *data;
size_t i = 0;
while (lsm_bt_iter_next(&data, &key, &iter)) {
TEST_CHECK_(key == sorted_chars[i], "%c == %c", key, sorted_chars[i]);
i++;
}
TEST_CHECK(i == char_count);
}
TEST_LIST = {
{ "bt init", test_init },
{ "bt insert first", test_insert_first },
@ -111,5 +138,6 @@ TEST_LIST = {
{ "bt insert multiple", test_insert_multiple },
{ "bt remove root", test_remove_root },
{ "bt remove multiple", test_remove_multiple },
{ "bt iter", test_iter },
{ NULL, NULL }
};

View File

@ -25,8 +25,64 @@ void test_insert_one() {
TEST_CHECK(data == (void *)1);
}
void test_iter() {
TRIE_INIT();
lsm_str *s;
lsm_str_init_copy(&s, "hello");
TEST_CHECK(lsm_trie_insert(trie, s, (void *)1) == lsm_error_ok);
lsm_str_init_copy(&s, "hella");
TEST_CHECK(lsm_trie_insert(trie, s, (void *)2) == lsm_error_ok);
lsm_str_init_copy(&s, "hel");
TEST_CHECK(lsm_trie_insert(trie, s, (void *)3) == lsm_error_ok);
lsm_str_init_copy(&s, "wow");
TEST_CHECK(lsm_trie_insert(trie, s, (void *)4) == lsm_error_ok);
lsm_str_init_copy(&s, "hel");
lsm_trie_iterator *iter;
TEST_CHECK(lsm_trie_iter(&iter, trie, s) == lsm_error_ok);
void *data;
TEST_CHECK(lsm_trie_iter_next(&data, iter) == lsm_error_ok);
TEST_CHECK_(data == (void *)3, "%p == %p", data, (void *)3);
TEST_CHECK(lsm_trie_iter_next(&data, iter) == lsm_error_ok);
TEST_CHECK_(data == (void *)2, "%p == %p", data, (void *)2);
TEST_CHECK(lsm_trie_iter_next(&data, iter) == lsm_error_ok);
TEST_CHECK_(data == (void *)1, "%p == %p", data, (void *)1);
TEST_CHECK(lsm_trie_iter_next(&data, iter) == lsm_error_done);
lsm_trie_iter_free(iter);
// Test full trie iterator
TEST_CHECK(lsm_trie_iter(&iter, trie, NULL) == lsm_error_ok);
TEST_CHECK(lsm_trie_iter_next(&data, iter) == lsm_error_ok);
TEST_CHECK_(data == (void *)3, "%p == %p", data, (void *)3);
TEST_CHECK(lsm_trie_iter_next(&data, iter) == lsm_error_ok);
TEST_CHECK_(data == (void *)2, "%p == %p", data, (void *)2);
TEST_CHECK(lsm_trie_iter_next(&data, iter) == lsm_error_ok);
TEST_CHECK_(data == (void *)1, "%p == %p", data, (void *)1);
TEST_CHECK(lsm_trie_iter_next(&data, iter) == lsm_error_ok);
TEST_CHECK_(data == (void *)4, "%p == %p", data, (void *)4);
TEST_CHECK(lsm_trie_iter_next(&data, iter) == lsm_error_done);
lsm_trie_iter_free(iter);
}
TEST_LIST = {
{ "trie init", test_init },
{ "trie insert one", test_insert_one },
{ "trie iter", test_iter },
{ NULL, NULL }
};