feat(lsm): iterator over items in trie
parent
5564e23ceb
commit
f4d711365d
|
@ -27,6 +27,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||||
* `-c` flag to use custom config file (useful for testing)
|
* `-c` flag to use custom config file (useful for testing)
|
||||||
* LSM
|
* LSM
|
||||||
* Binary tree iterators
|
* Binary tree iterators
|
||||||
|
* Trie iterators
|
||||||
|
|
||||||
## Removed
|
## Removed
|
||||||
|
|
||||||
|
|
|
@ -27,6 +27,7 @@ typedef enum lsm_error {
|
||||||
lsm_error_null_value = 4,
|
lsm_error_null_value = 4,
|
||||||
lsm_error_failed_io = 5,
|
lsm_error_failed_io = 5,
|
||||||
lsm_error_lock_busy = 6,
|
lsm_error_lock_busy = 6,
|
||||||
|
lsm_error_done = 7,
|
||||||
} lsm_error;
|
} lsm_error;
|
||||||
|
|
||||||
/*typedef struct lsm_string { */
|
/*typedef struct lsm_string { */
|
||||||
|
|
|
@ -102,6 +102,6 @@ void lsm_bt_iter(lsm_bt_iterator *out, const lsm_bt *bt);
|
||||||
* @return true if a new entry was returned, false if the iterator has no more
|
* @return true if a new entry was returned, false if the iterator has no more
|
||||||
* entries to return
|
* entries to return
|
||||||
*/
|
*/
|
||||||
bool lsm_bt_iter_next(const void **out, char *key_out, lsm_bt_iterator *iter);
|
bool lsm_bt_iter_next(void **out, char *key_out, lsm_bt_iterator *iter);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,7 +1,10 @@
|
||||||
#ifndef LSM_TRIE
|
#ifndef LSM_TRIE
|
||||||
#define LSM_TRIE
|
#define LSM_TRIE
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
#include "lsm.h"
|
#include "lsm.h"
|
||||||
|
#include "lsm/bt.h"
|
||||||
#include "lsm/str.h"
|
#include "lsm/str.h"
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -9,6 +12,11 @@
|
||||||
*/
|
*/
|
||||||
typedef struct lsm_trie lsm_trie;
|
typedef struct lsm_trie lsm_trie;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A node inside an `lsm_trie` trie
|
||||||
|
*/
|
||||||
|
typedef struct lsm_trie_node lsm_trie_node;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Initialize a new trie.
|
* Initialize a new trie.
|
||||||
*
|
*
|
||||||
|
@ -57,4 +65,36 @@ lsm_error lsm_trie_remove(void **out, lsm_trie *trie, const lsm_str *key);
|
||||||
*/
|
*/
|
||||||
uint64_t lsm_trie_size(const lsm_trie *trie);
|
uint64_t lsm_trie_size(const lsm_trie *trie);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents an in-flight iterator over a trie
|
||||||
|
*/
|
||||||
|
typedef struct lsm_trie_iterator lsm_trie_iterator;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize an iterator to iterate over all entries with keys starting
|
||||||
|
* with the given prefix.
|
||||||
|
*
|
||||||
|
* @param out pointer to store iterator pointer in
|
||||||
|
* @param trie trie to iterate
|
||||||
|
* @param prefix prefix of the keys; a zero-length string means iterating over
|
||||||
|
* the entire trie; NULL is interpreted as a zero-length string
|
||||||
|
*/
|
||||||
|
lsm_error lsm_trie_iter(lsm_trie_iterator **out, const lsm_trie *trie,
|
||||||
|
const lsm_str *prefix);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Advance the given iterator.
|
||||||
|
*
|
||||||
|
* @param out pointer to store next data pointer in
|
||||||
|
* @param iter iterator to advance
|
||||||
|
* @return `lsm_error_ok` if next element has been returned, `lsm_error_done` if
|
||||||
|
* no more elements are present, or some other error code
|
||||||
|
*/
|
||||||
|
lsm_error lsm_trie_iter_next(void **out, lsm_trie_iterator *iter);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Free the given iterator.
|
||||||
|
*/
|
||||||
|
void lsm_trie_iter_free(lsm_trie_iterator *iter);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -5,14 +5,11 @@
|
||||||
#include "lsm/str_internal.h"
|
#include "lsm/str_internal.h"
|
||||||
#include "lsm/trie.h"
|
#include "lsm/trie.h"
|
||||||
|
|
||||||
/**
|
struct lsm_trie_node {
|
||||||
* A node inside a trie structure
|
|
||||||
*/
|
|
||||||
typedef struct lsm_trie_node {
|
|
||||||
lsm_bt bt;
|
lsm_bt bt;
|
||||||
lsm_str skip;
|
lsm_str skip;
|
||||||
void *data;
|
void *data;
|
||||||
} lsm_trie_node;
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Allocate and initialize a new trie node
|
* Allocate and initialize a new trie node
|
||||||
|
@ -33,4 +30,13 @@ struct lsm_trie {
|
||||||
uint64_t size;
|
uint64_t size;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct lsm_trie_iterator {
|
||||||
|
lsm_trie_node *next;
|
||||||
|
struct {
|
||||||
|
lsm_bt_iterator *arr;
|
||||||
|
size_t len;
|
||||||
|
size_t cap;
|
||||||
|
} stack;
|
||||||
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -184,7 +184,7 @@ void lsm_bt_iter(lsm_bt_iterator *out, const lsm_bt *bt) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool lsm_bt_iter_next(const void **out, char *key_out, lsm_bt_iterator *iter) {
|
bool lsm_bt_iter_next(void **out, char *key_out, lsm_bt_iterator *iter) {
|
||||||
if (iter->next == NULL) {
|
if (iter->next == NULL) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,117 @@
|
||||||
|
#include "lsm.h"
|
||||||
|
#include "lsm/trie_internal.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Advance the iterator so that `next` now points to a node containing data. If
|
||||||
|
* no more data nodes are found, `next` is set to NULL. The initial value of
|
||||||
|
* `next` is ignored.
|
||||||
|
*/
|
||||||
|
lsm_error lsm_trie_iter_next_data_node(lsm_trie_iterator *iter) {
|
||||||
|
do {
|
||||||
|
// Traverse back up the stack until we reach either the end of the subtree,
|
||||||
|
// or an iterator that hasn't been exhausted yet
|
||||||
|
while ((iter->stack.len > 0) &&
|
||||||
|
!lsm_bt_iter_next((void **)&iter->next, NULL,
|
||||||
|
&iter->stack.arr[iter->stack.len - 1])) {
|
||||||
|
iter->stack.len--;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (iter->stack.len == 0) {
|
||||||
|
iter->next = NULL;
|
||||||
|
|
||||||
|
return lsm_error_ok;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add node's subtree to the stack
|
||||||
|
if (iter->stack.len == iter->stack.cap) {
|
||||||
|
lsm_bt_iterator *arr = realloc(
|
||||||
|
iter->stack.arr, iter->stack.cap * 2 * sizeof(lsm_bt_iterator));
|
||||||
|
|
||||||
|
if (arr == NULL) {
|
||||||
|
return lsm_error_failed_alloc;
|
||||||
|
}
|
||||||
|
|
||||||
|
iter->stack.arr = arr;
|
||||||
|
iter->stack.cap *= 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
lsm_bt_iter(&iter->stack.arr[iter->stack.len], &iter->next->bt);
|
||||||
|
iter->stack.len++;
|
||||||
|
|
||||||
|
} while (iter->next->data == NULL);
|
||||||
|
|
||||||
|
return lsm_error_ok;
|
||||||
|
}
|
||||||
|
|
||||||
|
lsm_error lsm_trie_iter(lsm_trie_iterator **out, const lsm_trie *trie,
|
||||||
|
const lsm_str *prefix) {
|
||||||
|
uint64_t prefix_len = prefix == NULL ? 0 : lsm_str_len(prefix);
|
||||||
|
|
||||||
|
uint64_t index = 0;
|
||||||
|
lsm_trie_node *node = trie->root;
|
||||||
|
|
||||||
|
// Simplified traversal down the trie to find the root of the subtree
|
||||||
|
// contaiing all nodes with the given prefix
|
||||||
|
while ((index < prefix_len) && (node != NULL)) {
|
||||||
|
char c = lsm_str_char(prefix, index);
|
||||||
|
lsm_error res = lsm_bt_search((void **)&node, &node->bt, c);
|
||||||
|
|
||||||
|
switch (res) {
|
||||||
|
case lsm_error_ok:
|
||||||
|
index += 1 + lsm_str_len(&node->skip);
|
||||||
|
break;
|
||||||
|
case lsm_error_not_found:
|
||||||
|
node = NULL;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return res;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
lsm_trie_iterator *iter = calloc(1, sizeof(lsm_trie_iterator));
|
||||||
|
|
||||||
|
if (iter == NULL) {
|
||||||
|
return lsm_error_failed_alloc;
|
||||||
|
}
|
||||||
|
|
||||||
|
iter->next = node;
|
||||||
|
|
||||||
|
if (node != NULL) {
|
||||||
|
lsm_bt_iterator *arr = malloc(sizeof(lsm_bt_iterator));
|
||||||
|
|
||||||
|
if (arr == NULL) {
|
||||||
|
free(iter);
|
||||||
|
|
||||||
|
return lsm_error_failed_alloc;
|
||||||
|
}
|
||||||
|
|
||||||
|
iter->stack.arr = arr;
|
||||||
|
iter->stack.cap = 1;
|
||||||
|
iter->stack.len = 1;
|
||||||
|
|
||||||
|
lsm_bt_iter(&iter->stack.arr[0], &node->bt);
|
||||||
|
|
||||||
|
if (node->data == NULL) {
|
||||||
|
LSM_RES(lsm_trie_iter_next_data_node(iter));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
*out = iter;
|
||||||
|
|
||||||
|
return lsm_error_ok;
|
||||||
|
}
|
||||||
|
|
||||||
|
lsm_error lsm_trie_iter_next(void **out, lsm_trie_iterator *iter) {
|
||||||
|
if (iter->next == NULL) {
|
||||||
|
return lsm_error_done;
|
||||||
|
}
|
||||||
|
|
||||||
|
*out = iter->next->data;
|
||||||
|
|
||||||
|
return lsm_trie_iter_next_data_node(iter);
|
||||||
|
}
|
||||||
|
|
||||||
|
void lsm_trie_iter_free(lsm_trie_iterator *iter) {
|
||||||
|
free(iter->stack.arr);
|
||||||
|
free(iter);
|
||||||
|
}
|
|
@ -120,7 +120,7 @@ void test_iter() {
|
||||||
lsm_bt_iter(&iter, bt);
|
lsm_bt_iter(&iter, bt);
|
||||||
|
|
||||||
char key;
|
char key;
|
||||||
const void *data;
|
void *data;
|
||||||
size_t i = 0;
|
size_t i = 0;
|
||||||
|
|
||||||
while (lsm_bt_iter_next(&data, &key, &iter)) {
|
while (lsm_bt_iter_next(&data, &key, &iter)) {
|
||||||
|
|
|
@ -25,8 +25,64 @@ void test_insert_one() {
|
||||||
TEST_CHECK(data == (void *)1);
|
TEST_CHECK(data == (void *)1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void test_iter() {
|
||||||
|
TRIE_INIT();
|
||||||
|
|
||||||
|
lsm_str *s;
|
||||||
|
|
||||||
|
lsm_str_init_copy(&s, "hello");
|
||||||
|
TEST_CHECK(lsm_trie_insert(trie, s, (void *)1) == lsm_error_ok);
|
||||||
|
|
||||||
|
lsm_str_init_copy(&s, "hella");
|
||||||
|
TEST_CHECK(lsm_trie_insert(trie, s, (void *)2) == lsm_error_ok);
|
||||||
|
|
||||||
|
lsm_str_init_copy(&s, "hel");
|
||||||
|
TEST_CHECK(lsm_trie_insert(trie, s, (void *)3) == lsm_error_ok);
|
||||||
|
|
||||||
|
lsm_str_init_copy(&s, "wow");
|
||||||
|
TEST_CHECK(lsm_trie_insert(trie, s, (void *)4) == lsm_error_ok);
|
||||||
|
|
||||||
|
lsm_str_init_copy(&s, "hel");
|
||||||
|
|
||||||
|
lsm_trie_iterator *iter;
|
||||||
|
TEST_CHECK(lsm_trie_iter(&iter, trie, s) == lsm_error_ok);
|
||||||
|
|
||||||
|
void *data;
|
||||||
|
|
||||||
|
TEST_CHECK(lsm_trie_iter_next(&data, iter) == lsm_error_ok);
|
||||||
|
TEST_CHECK_(data == (void *)3, "%p == %p", data, (void *)3);
|
||||||
|
|
||||||
|
TEST_CHECK(lsm_trie_iter_next(&data, iter) == lsm_error_ok);
|
||||||
|
TEST_CHECK_(data == (void *)2, "%p == %p", data, (void *)2);
|
||||||
|
|
||||||
|
TEST_CHECK(lsm_trie_iter_next(&data, iter) == lsm_error_ok);
|
||||||
|
TEST_CHECK_(data == (void *)1, "%p == %p", data, (void *)1);
|
||||||
|
|
||||||
|
TEST_CHECK(lsm_trie_iter_next(&data, iter) == lsm_error_done);
|
||||||
|
lsm_trie_iter_free(iter);
|
||||||
|
|
||||||
|
// Test full trie iterator
|
||||||
|
TEST_CHECK(lsm_trie_iter(&iter, trie, NULL) == lsm_error_ok);
|
||||||
|
|
||||||
|
TEST_CHECK(lsm_trie_iter_next(&data, iter) == lsm_error_ok);
|
||||||
|
TEST_CHECK_(data == (void *)3, "%p == %p", data, (void *)3);
|
||||||
|
|
||||||
|
TEST_CHECK(lsm_trie_iter_next(&data, iter) == lsm_error_ok);
|
||||||
|
TEST_CHECK_(data == (void *)2, "%p == %p", data, (void *)2);
|
||||||
|
|
||||||
|
TEST_CHECK(lsm_trie_iter_next(&data, iter) == lsm_error_ok);
|
||||||
|
TEST_CHECK_(data == (void *)1, "%p == %p", data, (void *)1);
|
||||||
|
|
||||||
|
TEST_CHECK(lsm_trie_iter_next(&data, iter) == lsm_error_ok);
|
||||||
|
TEST_CHECK_(data == (void *)4, "%p == %p", data, (void *)4);
|
||||||
|
|
||||||
|
TEST_CHECK(lsm_trie_iter_next(&data, iter) == lsm_error_done);
|
||||||
|
lsm_trie_iter_free(iter);
|
||||||
|
}
|
||||||
|
|
||||||
TEST_LIST = {
|
TEST_LIST = {
|
||||||
{ "trie init", test_init },
|
{ "trie init", test_init },
|
||||||
{ "trie insert one", test_insert_one },
|
{ "trie insert one", test_insert_one },
|
||||||
|
{ "trie iter", test_iter },
|
||||||
{ NULL, NULL }
|
{ NULL, NULL }
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in New Issue