From 87000e8f73c7633a3edc26124f796774771cc06f Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Fri, 13 Oct 2023 22:08:06 +0200 Subject: [PATCH] feat(lsm): added trie search --- lsm/include/lsm/str.h | 9 ++++++++ lsm/src/str/lsm_str.c | 27 ++++++++++++++++++++++ lsm/src/trie/lsm_trie.c | 50 +++++++++++++++++++++++++++++++++++++++++ lsm/test/bt/bt.c | 12 +++++----- lsm/test/trie/trie.c | 32 ++++++++++++++++++++++++++ 5 files changed, 124 insertions(+), 6 deletions(-) create mode 100644 lsm/test/trie/trie.c diff --git a/lsm/include/lsm/str.h b/lsm/include/lsm/str.h index b790a32..346fd54 100644 --- a/lsm/include/lsm/str.h +++ b/lsm/include/lsm/str.h @@ -34,6 +34,15 @@ void lsm_str_init_prealloc(lsm_str *str, char *s); */ lsm_error lsm_str_init(lsm_str **ptr, char *s); +/** + * Same as lsm_str_init, except it copies the original string instead of taking + * over ownership, leaving the original string untouched. + * + * @param ptr pointer to store newly allocated pointer + * @param s string to copy into lsm string + */ +lsm_error lsm_str_init_copy(lsm_str **ptr, char *s); + /** * Deallocate the existing internal string if needed and replace the lsm_str * with a string of length 0, wiping its contents. diff --git a/lsm/src/str/lsm_str.c b/lsm/src/str/lsm_str.c index 0e4e75b..a33c700 100644 --- a/lsm/src/str/lsm_str.c +++ b/lsm/src/str/lsm_str.c @@ -44,6 +44,33 @@ lsm_error lsm_str_init(lsm_str **ptr, char *s) { return lsm_error_ok; } +lsm_error lsm_str_init_copy(lsm_str **ptr, char *s) { + lsm_str *str = calloc(1, sizeof(lsm_str)); + + if (str == NULL) { + return lsm_error_failed_alloc; + } + + str->len = strlen(s); + + if (str->len <= 8) { + memcpy(str->data.val, s, str->len); + } else { + char *buf = malloc(str->len * sizeof(char)); + + if (buf == NULL) { + return lsm_error_failed_alloc; + } + + memcpy(buf, s, str->len); + str->data.ptr = buf; + } + + *ptr = str; + + return lsm_error_ok; +} + void lsm_str_zero(lsm_str *str) { if (str->len > 8) { free(str->data.ptr); diff --git a/lsm/src/trie/lsm_trie.c b/lsm/src/trie/lsm_trie.c index c7708df..541f89f 100644 --- a/lsm/src/trie/lsm_trie.c +++ b/lsm/src/trie/lsm_trie.c @@ -118,6 +118,8 @@ lsm_error lsm_trie_insert(lsm_trie *trie, lsm_str *key, void *data) { index += cmp; } + // This catches the edge case where the exact node for the string is already + // present in the trie if (node->data != NULL) { return lsm_error_already_present; } @@ -126,3 +128,51 @@ lsm_error lsm_trie_insert(lsm_trie *trie, lsm_str *key, void *data) { return lsm_error_ok; } + +lsm_error lsm_trie_search(void **data, lsm_trie *trie, lsm_str *key) { + uint64_t key_len = lsm_str_len(key); + + if (key_len == 0) { + if (trie->root->data != NULL) { + *data = trie->root->data; + + return lsm_error_ok; + } else { + return lsm_error_not_found; + } + } + + uint64_t index = 0; + lsm_trie_node *node = trie->root; + lsm_trie_node *next_node; + lsm_error res; + + while (index < key_len) { + char c = lsm_str_char(key, index); + res = lsm_bt_search((void **)&next_node, &node->bt, c); + + if (res != lsm_error_ok) { + return res; + } + + index++; + + uint64_t cmp = lsm_str_cmp(key, index, &next_node->skip, 0); + + // If we end in the middle of an edge, we definitely haven't found the node + if (cmp != lsm_str_len(&next_node->skip)) { + return lsm_error_not_found; + } + + node = next_node; + index += cmp; + } + + if (node->data == NULL) { + return lsm_error_not_found; + } + + *data = node->data; + + return lsm_error_ok; +} diff --git a/lsm/test/bt/bt.c b/lsm/test/bt/bt.c index 1900305..f96cf99 100644 --- a/lsm/test/bt/bt.c +++ b/lsm/test/bt/bt.c @@ -99,11 +99,11 @@ void test_remove_multiple() { } TEST_LIST = { - { "test init", test_init }, - { "test insert first", test_insert_first }, - { "test insert two", test_insert_two }, - { "test insert multiple", test_insert_multiple }, - { "test remove root", test_remove_root }, - { "test remove multiple", test_remove_multiple }, + { "bt init", test_init }, + { "bt insert first", test_insert_first }, + { "bt insert two", test_insert_two }, + { "bt insert multiple", test_insert_multiple }, + { "bt remove root", test_remove_root }, + { "bt remove multiple", test_remove_multiple }, { NULL, NULL } }; diff --git a/lsm/test/trie/trie.c b/lsm/test/trie/trie.c new file mode 100644 index 0000000..f3bf73b --- /dev/null +++ b/lsm/test/trie/trie.c @@ -0,0 +1,32 @@ +#include "lsm.h" +#include "test.h" +#include "lsm/trie_internal.h" + +#define TRIE_INIT() \ + lsm_trie *trie; \ + TEST_CHECK(lsm_trie_init(&trie) == lsm_error_ok); \ + TEST_CHECK(trie != NULL) + +void test_init() { + TRIE_INIT(); + /* lsm_trie_free(trie); */ +} + +void test_insert_one() { + TRIE_INIT(); + + lsm_str *s; + lsm_str_init_copy(&s, "hello"); + TEST_CHECK(lsm_trie_insert(trie, s, (void *)1) == lsm_error_ok); + TEST_CHECK(lsm_trie_insert(trie, s, (void *)1) == lsm_error_already_present); + + void *data; + TEST_CHECK(lsm_trie_search(&data, trie, s) == lsm_error_ok); + TEST_CHECK(data == (void *)1); +} + +TEST_LIST = { + { "trie init", test_init }, + { "trie insert one", test_insert_one }, + { NULL, NULL } +};