diff --git a/lsm/include/lsm.h b/lsm/include/lsm.h index 8ecb958..aa76826 100644 --- a/lsm/include/lsm.h +++ b/lsm/include/lsm.h @@ -9,8 +9,7 @@ typedef enum lsm_error { lsm_error_ok = 0, lsm_error_failed_alloc = 1, lsm_error_not_found = 2, - lsm_error_already_present = 3, - lsm_error_null_value = 4 + lsm_error_already_present = 3 } lsm_error; /*typedef struct lsm_string { */ diff --git a/lsm/include/lsm/bt.h b/lsm/include/lsm/bt.h index a0995a1..a2826b0 100644 --- a/lsm/include/lsm/bt.h +++ b/lsm/include/lsm/bt.h @@ -48,14 +48,4 @@ lsm_error lsm_bt_insert(lsm_bt *bt, char key, void *data); */ lsm_error lsm_bt_remove(void **out, lsm_bt *bt, char key); -/** - * Replace the data at an existing key with new data, returning the old. - * - * @param out address to write old data pointer to - * @param bt binary tree to replace in - * @param key key to replace at - * @param data new data to store - */ -lsm_error lsm_bt_replace(void **out, lsm_bt *bt, char key, void *data); - #endif diff --git a/lsm/include/lsm/str.h b/lsm/include/lsm/str.h index 346fd54..58930ec 100644 --- a/lsm/include/lsm/str.h +++ b/lsm/include/lsm/str.h @@ -34,15 +34,6 @@ void lsm_str_init_prealloc(lsm_str *str, char *s); */ lsm_error lsm_str_init(lsm_str **ptr, char *s); -/** - * Same as lsm_str_init, except it copies the original string instead of taking - * over ownership, leaving the original string untouched. - * - * @param ptr pointer to store newly allocated pointer - * @param s string to copy into lsm string - */ -lsm_error lsm_str_init_copy(lsm_str **ptr, char *s); - /** * Deallocate the existing internal string if needed and replace the lsm_str * with a string of length 0, wiping its contents. @@ -66,65 +57,4 @@ void lsm_str_free(lsm_str *str); */ uint64_t lsm_str_len(lsm_str *str); -/** - * Return a pointer to the string's underlying char array. Note that this array - * will *not* neccessarily be null-terminatd. - * - * @param str string to return pointer for - */ -const char *lsm_str_ptr(lsm_str *str); - -/** - * Returns the character at the specified position. - * - * @index index of character to return - */ -char lsm_str_char(lsm_str *str, uint64_t index); - -/** - * Take a substring and copy it to a provided string object. - * - * @param out string to store new substring in. The contents of this string will - * be replaced. - * @param str string to take substring from - * @param start inclusive start index for the substring. If this is greater than - * or equal to the string's length, out will be a zero-length string. - * @param end exclusive end index for the substring - */ -lsm_error lsm_str_substr(lsm_str *out, lsm_str *str, uint64_t start, - uint64_t end); - -/** - * Return the first index where s1 and s2 differ, starting at their respective - * offsets. If both strings are equal (or one is a prefix of the other), the - * result will be the length of the shortest string. The returned value is - * relative to the given offets. - * - * @param s1 string to compare - * @param s1_offset offset inside s1 to start comparing from - * @param s2 string to compare s1 to - * @param s2_offset offset inside s2 to start comparing from - */ -uint64_t lsm_str_cmp(lsm_str *s1, uint64_t s1_offset, lsm_str *s2, - uint64_t s2_offset); - -/** - * Truncate a string in-place. - * - * @param s string to truncate - * @param new_len new length of the string. If new_len is >= the original - * length, this function does nothing. - */ -lsm_error lsm_str_truncate(lsm_str *s, uint64_t new_len); - -/** - * Split s at the specified index, saving the second half the string in s2. - * - * @param s string to split - * @param s2 string to store second part of s - * @param index position to split string. If index is the length of s or - * greater, s2 will simply be an empty string. - */ -lsm_error lsm_str_split(lsm_str *s, lsm_str *s2, uint64_t index); - #endif diff --git a/lsm/src/_include/lsm/str_internal.h b/lsm/src/_include/lsm/str_internal.h index 03f5395..909a0df 100644 --- a/lsm/src/_include/lsm/str_internal.h +++ b/lsm/src/_include/lsm/str_internal.h @@ -8,7 +8,7 @@ struct lsm_str { uint64_t len; union { - char *ptr; + void *ptr; char val[8]; } data; }; diff --git a/lsm/src/_include/lsm/trie_internal.h b/lsm/src/_include/lsm/trie_internal.h index e3526d9..4fb7037 100644 --- a/lsm/src/_include/lsm/trie_internal.h +++ b/lsm/src/_include/lsm/trie_internal.h @@ -5,32 +5,10 @@ #include "lsm/str_internal.h" #include "lsm/trie.h" -/** - * A node inside a trie structure - */ typedef struct lsm_trie_node { lsm_bt bt; lsm_str skip; - void *data; + char c; } lsm_trie_node; -/** - * Allocate and initialize a new trie node - * - * @param ptr pointer to store new node pointer - */ -lsm_error lsm_trie_node_init(lsm_trie_node **ptr); - -/** - * Deallocate a trie node - * - * @param node node to deallocate - */ -void lsm_trie_node_free(lsm_trie_node *node); - -struct lsm_trie { - lsm_trie_node *root; - uint64_t size; -}; - #endif diff --git a/lsm/src/bt/lsm_bt.c b/lsm/src/bt/lsm_bt.c index d5b2895..da08cbd 100644 --- a/lsm/src/bt/lsm_bt.c +++ b/lsm/src/bt/lsm_bt.c @@ -130,20 +130,3 @@ lsm_error lsm_bt_remove(void **out, lsm_bt *bt, char key) { return lsm_error_ok; } - -lsm_error lsm_bt_replace(void **out, lsm_bt *bt, char key, void *data) { - lsm_bt_node *node = bt->root; - - while ((node != NULL) && (node->key != key)) { - node = key < node->key ? node->left : node->right; - } - - if (node == NULL) { - return lsm_error_not_found; - } - - *out = node->data; - node->data = data; - - return lsm_error_ok; -} diff --git a/lsm/src/str/lsm_str.c b/lsm/src/str/lsm_str.c index a33c700..38bce13 100644 --- a/lsm/src/str/lsm_str.c +++ b/lsm/src/str/lsm_str.c @@ -5,8 +5,6 @@ #include "lsm.h" #include "lsm/str_internal.h" -#define MIN(x, y) (((x) < (y)) ? (x) : (y)) - lsm_error lsm_str_init_zero(lsm_str **ptr) { lsm_str *str = calloc(1, sizeof(lsm_str)); @@ -44,33 +42,6 @@ lsm_error lsm_str_init(lsm_str **ptr, char *s) { return lsm_error_ok; } -lsm_error lsm_str_init_copy(lsm_str **ptr, char *s) { - lsm_str *str = calloc(1, sizeof(lsm_str)); - - if (str == NULL) { - return lsm_error_failed_alloc; - } - - str->len = strlen(s); - - if (str->len <= 8) { - memcpy(str->data.val, s, str->len); - } else { - char *buf = malloc(str->len * sizeof(char)); - - if (buf == NULL) { - return lsm_error_failed_alloc; - } - - memcpy(buf, s, str->len); - str->data.ptr = buf; - } - - *ptr = str; - - return lsm_error_ok; -} - void lsm_str_zero(lsm_str *str) { if (str->len > 8) { free(str->data.ptr); @@ -88,100 +59,3 @@ void lsm_str_free(lsm_str *str) { } uint64_t lsm_str_len(lsm_str *str) { return str->len; } - -const char *lsm_str_ptr(lsm_str *str) { - if (str->len <= 8) { - return str->data.val; - } else { - return str->data.ptr; - } -} - -char lsm_str_char(lsm_str *str, uint64_t index) { - if (str->len <= 8) { - return str->data.val[index]; - } else { - return str->data.ptr[index]; - } -} - -lsm_error lsm_str_substr(lsm_str *out, lsm_str *str, uint64_t start, - uint64_t end) { - // A substring that starts past the string's length will have length 0 - uint64_t len = start < str->len ? end - start : 0; - const char *str_ptr = lsm_str_ptr(str); - - if (len <= 8) { - lsm_str_zero(out); - memcpy(out->data.val, &str_ptr[start], len); - } else { - char *buf = malloc(len * sizeof(char)); - - if (buf == NULL) { - return lsm_error_failed_alloc; - } - - memcpy(buf, &str_ptr[start], len); - - lsm_str_zero(out); - out->data.ptr = buf; - } - - out->len = len; - - return lsm_error_ok; -} - -uint64_t lsm_str_cmp(lsm_str *s1, uint64_t s1_offset, lsm_str *s2, - uint64_t s2_offset) { - uint64_t index = 0; - uint64_t max_len = MIN(s1->len - s1_offset, s2->len - s2_offset); - - while ((index < max_len) && (lsm_str_char(s1, s1_offset + index) == - lsm_str_char(s2, s2_offset + index))) { - index++; - } - - return index; -} - -lsm_error lsm_str_truncate(lsm_str *s, uint64_t new_len) { - if (new_len >= s->len) { - return lsm_error_ok; - } - - if (new_len <= 8) { - char *s_buf = s->data.ptr; - - memcpy(s->data.val, lsm_str_ptr(s), new_len); - - if (s->len > 8) { - free(s_buf); - } - } else { - char *buf = malloc(new_len * sizeof(char)); - - if (buf == NULL) { - return lsm_error_failed_alloc; - } - - memcpy(buf, s->data.ptr, new_len); - free(s->data.ptr); - - s->data.ptr = buf; - } - - s->len = new_len; - - return lsm_error_ok; -} - -lsm_error lsm_str_split(lsm_str *s, lsm_str *s2, uint64_t index) { - lsm_error res = lsm_str_substr(s2, s, index, s->len); - - if (res != lsm_error_ok) { - return res; - } - - return lsm_str_truncate(s, index); -} diff --git a/lsm/src/trie/lsm_trie.c b/lsm/src/trie/lsm_trie.c index 541f89f..568decb 100644 --- a/lsm/src/trie/lsm_trie.c +++ b/lsm/src/trie/lsm_trie.c @@ -1,178 +1 @@ -#include - -#include "lsm.h" #include "lsm/trie_internal.h" - -lsm_error lsm_trie_node_init(lsm_trie_node **ptr) { - lsm_trie_node *node = calloc(1, sizeof(lsm_trie_node)); - - if (node == NULL) { - return lsm_error_failed_alloc; - } - - *ptr = node; - - return lsm_error_ok; -} - -lsm_error lsm_trie_init(lsm_trie **ptr) { - lsm_trie *trie = calloc(1, sizeof(lsm_trie)); - - if (trie == NULL) { - return lsm_error_failed_alloc; - } - - lsm_trie_node *root; - lsm_error res = lsm_trie_node_init(&root); - - if (res != lsm_error_ok) { - return res; - } - - trie->root = root; - *ptr = trie; - - return lsm_error_ok; -} - -lsm_error lsm_trie_insert(lsm_trie *trie, lsm_str *key, void *data) { - // NULL is not allowed as a data value, as it's used to indicate a lack of - // data - if (data == NULL) { - return lsm_error_null_value; - } - - uint64_t key_len = lsm_str_len(key); - - // Empty string is represented by the root - if (key_len == 0) { - if (trie->root->data == NULL) { - trie->root->data = data; - - return lsm_error_ok; - } else { - return lsm_error_already_present; - } - } - - uint64_t index = 0; - lsm_trie_node *node = trie->root; - lsm_trie_node *next_node; - lsm_error res; - - while (index < key_len) { - char c = lsm_str_char(key, index); - res = lsm_bt_search((void **)&next_node, &node->bt, c); - - // No child is present yet for this character, so we can insert the string - // here - if (res == lsm_error_not_found) { - lsm_trie_node *new_node; - res = lsm_trie_node_init(&new_node); - - if (res != lsm_error_ok) { - return res; - } - - new_node->data = data; - lsm_str_substr(&new_node->skip, key, index + 1, key_len); - - return lsm_bt_insert(&node->bt, c, new_node); - } - - index++; - - // We compare the remaining part of the key with the node's skip. If cmp is - // less than the length of the skip, we know they differ and the edge should - // be split. - uint64_t cmp = lsm_str_cmp(key, index, &next_node->skip, 0); - - if (cmp < lsm_str_len(&next_node->skip)) { - lsm_trie_node *split_node; - res = lsm_trie_node_init(&split_node); - - if (res != lsm_error_ok) { - return res; - } - - // split_node replaces the original node as the new child node - lsm_trie_node *bottom_node; - lsm_bt_replace((void **)&bottom_node, &node->bt, c, split_node); - - // The old child node now becomes the child of split_node - lsm_bt_insert(&split_node->bt, lsm_str_char(key, index + cmp), - bottom_node); - - // The new node splits the edge into two parts, so the new node will have - // the remaining part of the skip (minus the one character) as its skip - lsm_str_substr(&split_node->skip, &next_node->skip, cmp + 1, - lsm_str_len(&next_node->skip)); - - // The old node keeps the first part of the skip - lsm_str_truncate(&next_node->skip, cmp); - - next_node = split_node; - } - - node = next_node; - index += cmp; - } - - // This catches the edge case where the exact node for the string is already - // present in the trie - if (node->data != NULL) { - return lsm_error_already_present; - } - - node->data = data; - - return lsm_error_ok; -} - -lsm_error lsm_trie_search(void **data, lsm_trie *trie, lsm_str *key) { - uint64_t key_len = lsm_str_len(key); - - if (key_len == 0) { - if (trie->root->data != NULL) { - *data = trie->root->data; - - return lsm_error_ok; - } else { - return lsm_error_not_found; - } - } - - uint64_t index = 0; - lsm_trie_node *node = trie->root; - lsm_trie_node *next_node; - lsm_error res; - - while (index < key_len) { - char c = lsm_str_char(key, index); - res = lsm_bt_search((void **)&next_node, &node->bt, c); - - if (res != lsm_error_ok) { - return res; - } - - index++; - - uint64_t cmp = lsm_str_cmp(key, index, &next_node->skip, 0); - - // If we end in the middle of an edge, we definitely haven't found the node - if (cmp != lsm_str_len(&next_node->skip)) { - return lsm_error_not_found; - } - - node = next_node; - index += cmp; - } - - if (node->data == NULL) { - return lsm_error_not_found; - } - - *data = node->data; - - return lsm_error_ok; -} diff --git a/lsm/test/bt/bt.c b/lsm/test/bt/bt.c index f96cf99..1900305 100644 --- a/lsm/test/bt/bt.c +++ b/lsm/test/bt/bt.c @@ -99,11 +99,11 @@ void test_remove_multiple() { } TEST_LIST = { - { "bt init", test_init }, - { "bt insert first", test_insert_first }, - { "bt insert two", test_insert_two }, - { "bt insert multiple", test_insert_multiple }, - { "bt remove root", test_remove_root }, - { "bt remove multiple", test_remove_multiple }, + { "test init", test_init }, + { "test insert first", test_insert_first }, + { "test insert two", test_insert_two }, + { "test insert multiple", test_insert_multiple }, + { "test remove root", test_remove_root }, + { "test remove multiple", test_remove_multiple }, { NULL, NULL } }; diff --git a/lsm/test/trie/trie.c b/lsm/test/trie/trie.c deleted file mode 100644 index f3bf73b..0000000 --- a/lsm/test/trie/trie.c +++ /dev/null @@ -1,32 +0,0 @@ -#include "lsm.h" -#include "test.h" -#include "lsm/trie_internal.h" - -#define TRIE_INIT() \ - lsm_trie *trie; \ - TEST_CHECK(lsm_trie_init(&trie) == lsm_error_ok); \ - TEST_CHECK(trie != NULL) - -void test_init() { - TRIE_INIT(); - /* lsm_trie_free(trie); */ -} - -void test_insert_one() { - TRIE_INIT(); - - lsm_str *s; - lsm_str_init_copy(&s, "hello"); - TEST_CHECK(lsm_trie_insert(trie, s, (void *)1) == lsm_error_ok); - TEST_CHECK(lsm_trie_insert(trie, s, (void *)1) == lsm_error_already_present); - - void *data; - TEST_CHECK(lsm_trie_search(&data, trie, s) == lsm_error_ok); - TEST_CHECK(data == (void *)1); -} - -TEST_LIST = { - { "trie init", test_init }, - { "trie insert one", test_insert_one }, - { NULL, NULL } -};