feat(lsm): added trie search

lsm
Jef Roosens 2023-10-13 22:08:06 +02:00
parent 622d644f25
commit 87000e8f73
Signed by: Jef Roosens
GPG Key ID: B75D4F293C7052DB
5 changed files with 124 additions and 6 deletions

View File

@ -34,6 +34,15 @@ void lsm_str_init_prealloc(lsm_str *str, char *s);
*/
lsm_error lsm_str_init(lsm_str **ptr, char *s);
/**
* Same as lsm_str_init, except it copies the original string instead of taking
* over ownership, leaving the original string untouched.
*
* @param ptr pointer to store newly allocated pointer
* @param s string to copy into lsm string
*/
lsm_error lsm_str_init_copy(lsm_str **ptr, char *s);
/**
* Deallocate the existing internal string if needed and replace the lsm_str
* with a string of length 0, wiping its contents.

View File

@ -44,6 +44,33 @@ lsm_error lsm_str_init(lsm_str **ptr, char *s) {
return lsm_error_ok;
}
lsm_error lsm_str_init_copy(lsm_str **ptr, char *s) {
lsm_str *str = calloc(1, sizeof(lsm_str));
if (str == NULL) {
return lsm_error_failed_alloc;
}
str->len = strlen(s);
if (str->len <= 8) {
memcpy(str->data.val, s, str->len);
} else {
char *buf = malloc(str->len * sizeof(char));
if (buf == NULL) {
return lsm_error_failed_alloc;
}
memcpy(buf, s, str->len);
str->data.ptr = buf;
}
*ptr = str;
return lsm_error_ok;
}
void lsm_str_zero(lsm_str *str) {
if (str->len > 8) {
free(str->data.ptr);

View File

@ -118,6 +118,8 @@ lsm_error lsm_trie_insert(lsm_trie *trie, lsm_str *key, void *data) {
index += cmp;
}
// This catches the edge case where the exact node for the string is already
// present in the trie
if (node->data != NULL) {
return lsm_error_already_present;
}
@ -126,3 +128,51 @@ lsm_error lsm_trie_insert(lsm_trie *trie, lsm_str *key, void *data) {
return lsm_error_ok;
}
lsm_error lsm_trie_search(void **data, lsm_trie *trie, lsm_str *key) {
uint64_t key_len = lsm_str_len(key);
if (key_len == 0) {
if (trie->root->data != NULL) {
*data = trie->root->data;
return lsm_error_ok;
} else {
return lsm_error_not_found;
}
}
uint64_t index = 0;
lsm_trie_node *node = trie->root;
lsm_trie_node *next_node;
lsm_error res;
while (index < key_len) {
char c = lsm_str_char(key, index);
res = lsm_bt_search((void **)&next_node, &node->bt, c);
if (res != lsm_error_ok) {
return res;
}
index++;
uint64_t cmp = lsm_str_cmp(key, index, &next_node->skip, 0);
// If we end in the middle of an edge, we definitely haven't found the node
if (cmp != lsm_str_len(&next_node->skip)) {
return lsm_error_not_found;
}
node = next_node;
index += cmp;
}
if (node->data == NULL) {
return lsm_error_not_found;
}
*data = node->data;
return lsm_error_ok;
}

View File

@ -99,11 +99,11 @@ void test_remove_multiple() {
}
TEST_LIST = {
{ "test init", test_init },
{ "test insert first", test_insert_first },
{ "test insert two", test_insert_two },
{ "test insert multiple", test_insert_multiple },
{ "test remove root", test_remove_root },
{ "test remove multiple", test_remove_multiple },
{ "bt init", test_init },
{ "bt insert first", test_insert_first },
{ "bt insert two", test_insert_two },
{ "bt insert multiple", test_insert_multiple },
{ "bt remove root", test_remove_root },
{ "bt remove multiple", test_remove_multiple },
{ NULL, NULL }
};

View File

@ -0,0 +1,32 @@
#include "lsm.h"
#include "test.h"
#include "lsm/trie_internal.h"
#define TRIE_INIT() \
lsm_trie *trie; \
TEST_CHECK(lsm_trie_init(&trie) == lsm_error_ok); \
TEST_CHECK(trie != NULL)
void test_init() {
TRIE_INIT();
/* lsm_trie_free(trie); */
}
void test_insert_one() {
TRIE_INIT();
lsm_str *s;
lsm_str_init_copy(&s, "hello");
TEST_CHECK(lsm_trie_insert(trie, s, (void *)1) == lsm_error_ok);
TEST_CHECK(lsm_trie_insert(trie, s, (void *)1) == lsm_error_already_present);
void *data;
TEST_CHECK(lsm_trie_search(&data, trie, s) == lsm_error_ok);
TEST_CHECK(data == (void *)1);
}
TEST_LIST = {
{ "trie init", test_init },
{ "trie insert one", test_insert_one },
{ NULL, NULL }
};