feat(lsm): implement a simple trie remove
parent
682f422e3c
commit
6938c29725
|
@ -20,6 +20,16 @@ lsm_error lsm_bt_init(lsm_bt **ptr);
|
||||||
*/
|
*/
|
||||||
void lsm_bt_free(lsm_bt *bt);
|
void lsm_bt_free(lsm_bt *bt);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Remove the binary tree's entire contents, but keep the struct allocated.
|
||||||
|
*/
|
||||||
|
void lsm_bt_clear(lsm_bt *bt);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the size of the binary tree
|
||||||
|
*/
|
||||||
|
uint64_t lsm_bt_size(lsm_bt *bt);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Search for the data stored behind the given key.
|
* Search for the data stored behind the given key.
|
||||||
*
|
*
|
||||||
|
|
|
@ -18,16 +18,16 @@ lsm_error lsm_bt_node_init(lsm_bt_node **ptr, const char key, void *data) {
|
||||||
|
|
||||||
void lsm_bt_node_free(lsm_bt_node *node) { free(node); }
|
void lsm_bt_node_free(lsm_bt_node *node) { free(node); }
|
||||||
|
|
||||||
void lsm_bt_node_free_recursive(lsm_bt_node *node) {
|
void lsm_bt_node_free_tree(lsm_bt_node *node) {
|
||||||
if (node->left != NULL) {
|
if (node->left != NULL) {
|
||||||
lsm_bt_node_free_recursive(node->left);
|
lsm_bt_node_free_tree(node->left);
|
||||||
|
lsm_bt_node_free(node->left);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (node->right != NULL) {
|
if (node->right != NULL) {
|
||||||
lsm_bt_node_free_recursive(node->right);
|
lsm_bt_node_free_tree(node->right);
|
||||||
|
lsm_bt_node_free(node->right);
|
||||||
}
|
}
|
||||||
|
|
||||||
lsm_bt_node_free(node);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
lsm_error lsm_bt_init(lsm_bt **ptr) {
|
lsm_error lsm_bt_init(lsm_bt **ptr) {
|
||||||
|
@ -42,14 +42,23 @@ lsm_error lsm_bt_init(lsm_bt **ptr) {
|
||||||
return lsm_error_ok;
|
return lsm_error_ok;
|
||||||
}
|
}
|
||||||
|
|
||||||
void lsm_bt_free(lsm_bt *bt) {
|
void lsm_bt_clear(lsm_bt *bt) {
|
||||||
if (bt->root != NULL) {
|
if (bt->root != NULL) {
|
||||||
lsm_bt_node_free_recursive(bt->root);
|
lsm_bt_node_free_tree(bt->root);
|
||||||
|
lsm_bt_node_free(bt->root);
|
||||||
|
|
||||||
|
bt->root = NULL;
|
||||||
|
bt->size = 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void lsm_bt_free(lsm_bt *bt) {
|
||||||
|
lsm_bt_clear(bt);
|
||||||
free(bt);
|
free(bt);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint64_t lsm_bt_size(lsm_bt *bt) { return bt->size; }
|
||||||
|
|
||||||
lsm_error lsm_bt_insert(lsm_bt *bt, char key, void *data) {
|
lsm_error lsm_bt_insert(lsm_bt *bt, char key, void *data) {
|
||||||
lsm_bt_node **dest = &bt->root;
|
lsm_bt_node **dest = &bt->root;
|
||||||
|
|
||||||
|
@ -85,7 +94,9 @@ lsm_error lsm_bt_search(void **out, lsm_bt *bt, char key) {
|
||||||
return lsm_error_not_found;
|
return lsm_error_not_found;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (out != NULL) {
|
||||||
*out = node->data;
|
*out = node->data;
|
||||||
|
}
|
||||||
|
|
||||||
return lsm_error_ok;
|
return lsm_error_ok;
|
||||||
}
|
}
|
||||||
|
@ -105,7 +116,9 @@ lsm_error lsm_bt_remove(void **out, lsm_bt *bt, char key) {
|
||||||
return lsm_error_not_found;
|
return lsm_error_not_found;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (out != NULL) {
|
||||||
*out = (*dest)->data;
|
*out = (*dest)->data;
|
||||||
|
}
|
||||||
bt->size--;
|
bt->size--;
|
||||||
|
|
||||||
if (((*dest)->left != NULL) && ((*dest)->right != NULL)) {
|
if (((*dest)->left != NULL) && ((*dest)->right != NULL)) {
|
||||||
|
@ -142,7 +155,10 @@ lsm_error lsm_bt_replace(void **out, lsm_bt *bt, char key, void *data) {
|
||||||
return lsm_error_not_found;
|
return lsm_error_not_found;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (out != NULL) {
|
||||||
*out = node->data;
|
*out = node->data;
|
||||||
|
}
|
||||||
|
|
||||||
node->data = data;
|
node->data = data;
|
||||||
|
|
||||||
return lsm_error_ok;
|
return lsm_error_ok;
|
||||||
|
|
|
@ -16,6 +16,12 @@ lsm_error lsm_trie_node_init(lsm_trie_node **ptr) {
|
||||||
return lsm_error_ok;
|
return lsm_error_ok;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void lsm_trie_node_free(lsm_trie_node *node) {
|
||||||
|
lsm_bt_clear(&node->bt);
|
||||||
|
lsm_str_zero(&node->skip);
|
||||||
|
free(node);
|
||||||
|
}
|
||||||
|
|
||||||
lsm_error lsm_trie_init(lsm_trie **ptr) {
|
lsm_error lsm_trie_init(lsm_trie **ptr) {
|
||||||
lsm_trie *trie = calloc(1, sizeof(lsm_trie));
|
lsm_trie *trie = calloc(1, sizeof(lsm_trie));
|
||||||
|
|
||||||
|
@ -142,12 +148,14 @@ lsm_error lsm_trie_insert(lsm_trie *trie, lsm_str *key, void *data) {
|
||||||
return lsm_error_ok;
|
return lsm_error_ok;
|
||||||
}
|
}
|
||||||
|
|
||||||
lsm_error lsm_trie_search(void **data, lsm_trie *trie, lsm_str *key) {
|
lsm_error lsm_trie_search(void **out, lsm_trie *trie, lsm_str *key) {
|
||||||
uint64_t key_len = lsm_str_len(key);
|
uint64_t key_len = lsm_str_len(key);
|
||||||
|
|
||||||
if (key_len == 0) {
|
if (key_len == 0) {
|
||||||
if (trie->root->data != NULL) {
|
if (trie->root->data != NULL) {
|
||||||
*data = trie->root->data;
|
if (out != NULL) {
|
||||||
|
*out = trie->root->data;
|
||||||
|
}
|
||||||
|
|
||||||
return lsm_error_ok;
|
return lsm_error_ok;
|
||||||
} else {
|
} else {
|
||||||
|
@ -185,7 +193,80 @@ lsm_error lsm_trie_search(void **data, lsm_trie *trie, lsm_str *key) {
|
||||||
return lsm_error_not_found;
|
return lsm_error_not_found;
|
||||||
}
|
}
|
||||||
|
|
||||||
*data = node->data;
|
if (out != NULL) {
|
||||||
|
*out = node->data;
|
||||||
|
}
|
||||||
|
|
||||||
|
return lsm_error_ok;
|
||||||
|
}
|
||||||
|
|
||||||
|
lsm_error lsm_trie_remove(void **data, lsm_trie *trie, lsm_str *key) {
|
||||||
|
uint64_t key_len = lsm_str_len(key);
|
||||||
|
|
||||||
|
if (key_len == 0) {
|
||||||
|
if (trie->root->data != NULL) {
|
||||||
|
if (data != NULL) {
|
||||||
|
*data = trie->root->data;
|
||||||
|
}
|
||||||
|
|
||||||
|
trie->root->data = NULL;
|
||||||
|
trie->size--;
|
||||||
|
|
||||||
|
return lsm_error_ok;
|
||||||
|
} else {
|
||||||
|
return lsm_error_not_found;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t index = 0;
|
||||||
|
lsm_trie_node *parent = trie->root;
|
||||||
|
lsm_trie_node *child;
|
||||||
|
lsm_error res;
|
||||||
|
char c;
|
||||||
|
|
||||||
|
while (index < key_len) {
|
||||||
|
c = lsm_str_char(key, index);
|
||||||
|
res = lsm_bt_search((void **)&child, &parent->bt, c);
|
||||||
|
|
||||||
|
if (res != lsm_error_ok) {
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
index++;
|
||||||
|
|
||||||
|
uint64_t cmp = lsm_str_cmp(key, index, &child->skip, 0);
|
||||||
|
|
||||||
|
// If we end in the middle of an edge, we definitely haven't found the node
|
||||||
|
if (cmp != lsm_str_len(&child->skip)) {
|
||||||
|
return lsm_error_not_found;
|
||||||
|
}
|
||||||
|
|
||||||
|
index += cmp;
|
||||||
|
|
||||||
|
// This context is needed for the removal
|
||||||
|
if (index < key_len) {
|
||||||
|
parent = child;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (child->data == NULL) {
|
||||||
|
return lsm_error_not_found;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Child is the node we wish to delete
|
||||||
|
if (data != NULL) {
|
||||||
|
*data = child->data;
|
||||||
|
}
|
||||||
|
|
||||||
|
child->data = NULL;
|
||||||
|
|
||||||
|
// We only remove child if it has no children of its own
|
||||||
|
if (lsm_bt_size(&child->bt) == 0) {
|
||||||
|
lsm_bt_remove(NULL, &parent->bt, c);
|
||||||
|
lsm_trie_node_free(child);
|
||||||
|
}
|
||||||
|
|
||||||
|
trie->size--;
|
||||||
|
|
||||||
return lsm_error_ok;
|
return lsm_error_ok;
|
||||||
}
|
}
|
||||||
|
|
|
@ -54,6 +54,8 @@ void test_insert_multiple() {
|
||||||
TEST_CHECK(lsm_bt_insert(bt, chars[i], (void *)(i + 1)) == lsm_error_ok);
|
TEST_CHECK(lsm_bt_insert(bt, chars[i], (void *)(i + 1)) == lsm_error_ok);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_CHECK(lsm_bt_size(bt) == char_count);
|
||||||
|
|
||||||
void *data;
|
void *data;
|
||||||
for (size_t i = 0; i < char_count; i++) {
|
for (size_t i = 0; i < char_count; i++) {
|
||||||
TEST_CHECK(lsm_bt_insert(bt, chars[i], (void *)(i + 1)) == lsm_error_already_present);
|
TEST_CHECK(lsm_bt_insert(bt, chars[i], (void *)(i + 1)) == lsm_error_already_present);
|
||||||
|
@ -68,11 +70,13 @@ void test_remove_root() {
|
||||||
BT_INIT();
|
BT_INIT();
|
||||||
|
|
||||||
TEST_CHECK(lsm_bt_insert(bt, 'a', (void *)1) == lsm_error_ok);
|
TEST_CHECK(lsm_bt_insert(bt, 'a', (void *)1) == lsm_error_ok);
|
||||||
|
TEST_CHECK(lsm_bt_size(bt) == 1);
|
||||||
|
|
||||||
void *data;
|
void *data;
|
||||||
TEST_CHECK(lsm_bt_remove(&data, bt, 'a') == lsm_error_ok);
|
TEST_CHECK(lsm_bt_remove(&data, bt, 'a') == lsm_error_ok);
|
||||||
TEST_CHECK(data == (void *)1);
|
TEST_CHECK(data == (void *)1);
|
||||||
TEST_CHECK(bt->root == NULL);
|
TEST_CHECK(bt->root == NULL);
|
||||||
|
TEST_CHECK(lsm_bt_size(bt) == 0);
|
||||||
|
|
||||||
lsm_bt_free(bt);
|
lsm_bt_free(bt);
|
||||||
}
|
}
|
||||||
|
@ -95,6 +99,8 @@ void test_remove_multiple() {
|
||||||
TEST_CHECK(data == (void *)6);
|
TEST_CHECK(data == (void *)6);
|
||||||
TEST_CHECK(lsm_bt_remove(&data, bt, 'e') == lsm_error_not_found);
|
TEST_CHECK(lsm_bt_remove(&data, bt, 'e') == lsm_error_not_found);
|
||||||
|
|
||||||
|
TEST_CHECK(lsm_bt_size(bt) == char_count - 2);
|
||||||
|
|
||||||
lsm_bt_free(bt);
|
lsm_bt_free(bt);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -122,8 +122,7 @@ int fuzzy_test_trie_seed(FuzzyConfig conf) {
|
||||||
lsm_trie *trie;
|
lsm_trie *trie;
|
||||||
lsm_trie_init(&trie);
|
lsm_trie_init(&trie);
|
||||||
|
|
||||||
bool changed;
|
lsm_error res;
|
||||||
lsm_error status;
|
|
||||||
|
|
||||||
// 0: success
|
// 0: success
|
||||||
// 1: invalid add
|
// 1: invalid add
|
||||||
|
@ -134,13 +133,13 @@ int fuzzy_test_trie_seed(FuzzyConfig conf) {
|
||||||
|
|
||||||
// Add all strings to trie, checking for duplicates
|
// Add all strings to trie, checking for duplicates
|
||||||
for (int i = 0; i < conf.word_count; i++) {
|
for (int i = 0; i < conf.word_count; i++) {
|
||||||
status = lsm_trie_insert(trie, &matrix[i], (void **)1);
|
res = lsm_trie_insert(trie, &matrix[i], (void **)1);
|
||||||
|
|
||||||
// if changed is false, *contains_dedupped[i] should be true, as changed
|
// if changed is false, *contains_dedupped[i] should be true, as changed
|
||||||
// can only be false if the string is already contained in the trie. if
|
// can only be false if the string is already contained in the trie. if
|
||||||
// changed is true, *contains_dedupped[i] should be false, as the string
|
// changed is true, *contains_dedupped[i] should be false, as the string
|
||||||
// cannot be in the trie yet.
|
// cannot be in the trie yet.
|
||||||
if (status == lsm_error_ok && *contains_dedupped[i]) {
|
if (res == lsm_error_ok && *contains_dedupped[i]) {
|
||||||
exit_code = 1;
|
exit_code = 1;
|
||||||
goto END;
|
goto END;
|
||||||
}
|
}
|
||||||
|
@ -159,26 +158,27 @@ int fuzzy_test_trie_seed(FuzzyConfig conf) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remove all strings again, again taking duplicates into consideration
|
// Remove all strings again, again taking duplicates into consideration
|
||||||
/* for (int i = 0; i < conf.word_count; i++) { */
|
for (int i = 0; i < conf.word_count; i++) {
|
||||||
/* changed = remove_func(ct, matrix[i]); */
|
res = lsm_trie_remove(NULL, trie, &matrix[i]);
|
||||||
|
|
||||||
/* // The string shouldn't be in the trie, yet another add operation */
|
// The string shouldn't be in the trie, yet another add operation
|
||||||
/* // says it added it as well */
|
// says it added it as well
|
||||||
/* if (changed != *contains_dedupped[i]) { */
|
if (res == lsm_error_ok && !*contains_dedupped[i]) {
|
||||||
/* exit_code = 2; */
|
exit_code = 2;
|
||||||
/* goto END; */
|
goto END;
|
||||||
/* } */
|
}
|
||||||
|
|
||||||
/* if (*contains_dedupped[i]) { */
|
if (*contains_dedupped[i]) {
|
||||||
/* *contains_dedupped[i] = false; */
|
*contains_dedupped[i] = false;
|
||||||
/* size--; */
|
size--;
|
||||||
/* } */
|
}
|
||||||
/* } */
|
}
|
||||||
|
|
||||||
// Finally, check that the trie is completely empty
|
// Finally, check that the trie is completely empty
|
||||||
/* if (size_func(ct) != 0) { */
|
if (lsm_trie_size(trie) != 0) {
|
||||||
/* exit_code = 4; */
|
printf("%lu %lu\n", lsm_trie_size(trie), size);
|
||||||
/* } */
|
exit_code = 4;
|
||||||
|
}
|
||||||
|
|
||||||
END:
|
END:
|
||||||
/* trie_free(ct); */
|
/* trie_free(ct); */
|
||||||
|
|
Loading…
Reference in New Issue