Compare commits

...

4 Commits

16 changed files with 2444 additions and 64 deletions

View File

@ -3,7 +3,7 @@ LIB_FILENAME = liblsm.a
BUILD_DIR = build
SRC_DIR = src
TEST_DIR = test
INC_DIRS = include
INC_DIRS = include src/_include
# -MMD: generate a .d file for every source file. This file can be imported by
# make and makes make aware that a header file has been changed, ensuring an

View File

@ -6,62 +6,58 @@
#define LSM_MAX_SKIP_SIZE 8
typedef enum lsm_error {
lsm_error_ok = 0,
lsm_error_failed_alloc = 1,
lsm_error_not_found = 2
lsm_error_ok = 0,
lsm_error_failed_alloc = 1,
lsm_error_not_found = 2,
lsm_error_already_present = 3
} lsm_error;
/**
* Represents a string (or really any kind of data) with a known length. Data
* with length 8 or less is stored inside the pointer, and does not allocate
* additional memory.
*/
typedef struct lsm_string {
uint64_t len;
union {
void *ptr;
char val[8];
} str;
} lsm_string;
/*typedef struct lsm_string { */
/* uint64_t len; */
/* union { */
/* void *ptr; */
/* char val[8]; */
/* } str; */
/*} lsm_string; */
/**
* The type of an attribute. Each type is represented as a single bit of a
* 32-bit integer, so they can be easily combined into a bitmap.
*/
typedef enum lsm_attr_type {
lsm_attr_type_entry_type = 1 << 0
} lsm_attr_type;
/*/1** */
/* * The type of an attribute. Each type is represented as a single bit of a */
/* * 32-bit integer, so they can be easily combined into a bitmap. */
/* *1/ */
/*typedef enum lsm_attr_type { lsm_attr_type_entry_type = 1 << 0 }
* lsm_attr_type; */
/**
* A single attribute associated with an entry
*/
typedef struct lsm_attr {
lsm_attr_type type;
lsm_string str;
} lsm_attr;
/*/1** */
/* * A single attribute associated with an entry */
/* *1/ */
/*typedef struct lsm_attr { */
/* lsm_attr_type type; */
/* lsm_string str; */
/*} lsm_attr; */
/**
* Represents a collection of attributes for an entry. A collection can only
* contain one of each attribute.
/*/1** */
/* * Represents a collection of attributes for an entry. A collection can only
*/
typedef struct lsm_attr_list {
uint64_t count;
lsm_attr *items;
uint32_t bitmap;
} lsm_attr_list;
/* * contain one of each attribute. */
/* *1/ */
/*typedef struct lsm_attr_list { */
/* uint64_t count; */
/* lsm_attr *items; */
/* uint32_t bitmap; */
/*} lsm_attr_list; */
/**
* An entry inside an LSM store
*/
typedef struct lsm_entry {
lsm_string key;
lsm_attr_list attrs;
lsm_string data;
} lsm_entry;
/*/1** */
/* * An entry inside an LSM store */
/* *1/ */
/*typedef struct lsm_entry { */
/* lsm_string key; */
/* lsm_attr_list attrs; */
/* lsm_string data; */
/*} lsm_entry; */
/**
* A store of entries, which manages its data both in-memory and on disk.
*/
typedef struct lsm_store lsm_store;
/*/1** */
/* * A store of entries, which manages its data both in-memory and on disk. */
/* *1/ */
/*typedef struct lsm_store lsm_store; */
#endif

View File

@ -0,0 +1,51 @@
#ifndef LSM_BT
#define LSM_BT
#include "lsm.h"
/**
* A binary tree implementation using char values as keys
*/
typedef struct lsm_bt lsm_bt;
/**
* Initialize a new binary tree
*
* @param ptr where to store newly allocated pointer
*/
lsm_error lsm_bt_init(lsm_bt **ptr);
/**
* Deallocate an entire binary tree, including all its nodes
*/
void lsm_bt_free(lsm_bt *bt);
/**
* Search for the data stored behind the given key.
*
* @param out pointer to store data pointer in
* @param bt binary tree to search
* @param key key to search
*/
lsm_error lsm_bt_search(void **out, lsm_bt *bt, char key);
/**
* Insert a new data value into the tree with the given key.
*
* @param bt binary tree to insert into
* @param key key to insert
* @param data data to store
*/
lsm_error lsm_bt_insert(lsm_bt *bt, char key, void *data);
/**
* Remove the given key from the binary tree. Ownership of the data pointer is
* returned to the caller.
*
* @param out address to write data pointer to
* @param bt binary tree to remove from
* @param key key to remove
*/
lsm_error lsm_bt_remove(void **out, lsm_bt *bt, char key);
#endif

View File

@ -0,0 +1,60 @@
#ifndef LSM_STR
#define LSM_STR
#include "lsm.h"
/**
* Represents a string (or really any kind of data) with a known length. Data
* with length 8 or less is stored inside the pointer, and does not allocate
* additional memory.
*/
typedef struct lsm_str lsm_str;
/**
* Allocate a new string struct of length 0.
*
* @param ptr pointer to store newly allocated pointer in
*/
lsm_error lsm_str_init_zero(lsm_str **ptr);
/**
* Update an existing lsm_str so it now represents the new provided string. The
* string pointer of the original object is free'd if needed.
*
* @param str lsm_str object to modify
* @param s string to convert into lsm string; ownership is taken over
*/
void lsm_str_init_prealloc(lsm_str *str, char *s);
/**
* Allocate and initialize a new lsm_str object
*
* @param ptr pointer to store newly allocated pointer
* @param s string to convert into lsm string; ownership is taken over
*/
lsm_error lsm_str_init(lsm_str **ptr, char *s);
/**
* Deallocate the existing internal string if needed and replace the lsm_str
* with a string of length 0, wiping its contents.
*
* @param str string to wipe
*/
void lsm_str_zero(lsm_str *str);
/**
* Deallocate the string and its internal char buffer if needed. Only call this
* on heap-allocated strings.
*
* @param str string to dealloate
*/
void lsm_str_free(lsm_str *str);
/**
* Return the length of the string.
*
* @param str string to return length for.
*/
uint64_t lsm_str_len(lsm_str *str);
#endif

View File

@ -0,0 +1,60 @@
#ifndef LSM_TRIE
#define LSM_TRIE
#include "lsm.h"
#include "lsm/str.h"
/**
* A struct representing a trie
*/
typedef struct lsm_trie lsm_trie;
/**
* Initialize a new trie.
*
* @param ptr where to store the newly allocated pointer
*/
lsm_error lsm_trie_init(lsm_trie **ptr);
/**
* Deallocate an entire trie, including all its nodes
*
* @param trie trie to free
*/
void lsm_trie_free(lsm_trie *trie);
/**
* Insert a new element into the trie using the specified key.
*
* @param trie trie to insert into
* @param key key to insert data with
* @param data data to insert
*/
lsm_error lsm_trie_insert(lsm_trie *trie, lsm_str *key, void *data);
/**
* Search for an element in the trie.
*
* @param out where to store data opinter, if present
* @param trie trie to search in
* @param key key to search with
*/
lsm_error lsm_trie_search(void **data, lsm_trie *trie, lsm_str *key);
/**
* Remove an element from the trie.
*
* @param out where to store the removed data pointer, if present.
* @param trie trie to remove from
* @param key key to remove
*/
lsm_error lsm_trie_remove(void **data, lsm_trie *trie, lsm_str *key);
/**
* Return the size of a trie
*
* @param trie trie to return size for
*/
uint64_t lsm_trie_size(lsm_trie *trie);
#endif

View File

@ -0,0 +1,38 @@
#ifndef LSM_BT_INTERNAL
#define LSM_BT_INTERNAL
#include <stdint.h>
#include "lsm.h"
#include "lsm/bt.h"
/**
* Node inside a binary tree
*/
typedef struct lsm_bt_node {
struct lsm_bt_node *left;
struct lsm_bt_node *right;
void *data;
char key;
} lsm_bt_node;
/**
* Initialize a new binary tree node
*
* @param ptr where to store newly allocated pointer
* @param key key for the node
* @param data data to store
*/
lsm_error lsm_bt_node_init(lsm_bt_node **ptr, const char key, void *data);
/**
* Deallocate a single binary tree node
*/
void lsm_bt_node_free(lsm_bt_node *node);
struct lsm_bt {
lsm_bt_node *root;
uint8_t size;
};
#endif

View File

@ -0,0 +1,16 @@
#ifndef LSM_STR_INTERNAL
#define LSM_STR_INTERNAL
#include <stdint.h>
#include "lsm/str.h"
struct lsm_str {
uint64_t len;
union {
void *ptr;
char val[8];
} data;
};
#endif

View File

@ -0,0 +1,14 @@
#ifndef LSM_TRIE_INTERNAL
#define LSM_TRIE_INTERNAL
#include "lsm/bt_internal.h"
#include "lsm/str_internal.h"
#include "lsm/trie.h"
typedef struct lsm_trie_node {
lsm_bt bt;
lsm_str skip;
char c;
} lsm_trie_node;
#endif

132
lsm/src/bt/lsm_bt.c 100644
View File

@ -0,0 +1,132 @@
#include <stdlib.h>
#include "lsm/bt_internal.h"
lsm_error lsm_bt_node_init(lsm_bt_node **ptr, const char key, void *data) {
lsm_bt_node *node = calloc(1, sizeof(lsm_bt_node));
if (node == NULL) {
return lsm_error_failed_alloc;
}
node->key = key;
node->data = data;
*ptr = node;
return lsm_error_ok;
}
void lsm_bt_node_free(lsm_bt_node *node) { free(node); }
void lsm_bt_node_free_recursive(lsm_bt_node *node) {
if (node->left != NULL) {
lsm_bt_node_free_recursive(node->left);
}
if (node->right != NULL) {
lsm_bt_node_free_recursive(node->right);
}
lsm_bt_node_free(node);
}
lsm_error lsm_bt_init(lsm_bt **ptr) {
lsm_bt *bt = calloc(1, sizeof(lsm_bt));
if (bt == NULL) {
return lsm_error_failed_alloc;
}
*ptr = bt;
return lsm_error_ok;
}
void lsm_bt_free(lsm_bt *bt) {
if (bt->root != NULL) {
lsm_bt_node_free_recursive(bt->root);
}
free(bt);
}
lsm_error lsm_bt_insert(lsm_bt *bt, char key, void *data) {
lsm_bt_node **dest = &bt->root;
// Traverse down the tree until we reach the new point to insert our node
while ((*dest != NULL) && ((*dest)->key != key)) {
dest = key < (*dest)->key ? &(*dest)->left : &(*dest)->right;
}
if (*dest != NULL) {
return lsm_error_already_present;
}
lsm_bt_node *node;
if (lsm_bt_node_init(&node, key, data) != lsm_error_ok) {
return lsm_error_failed_alloc;
}
*dest = node;
bt->size++;
return lsm_error_ok;
}
lsm_error lsm_bt_search(void **out, lsm_bt *bt, char key) {
lsm_bt_node *node = bt->root;
while ((node != NULL) && (node->key != key)) {
node = key < node->key ? node->left : node->right;
}
if (node == NULL) {
return lsm_error_not_found;
}
*out = node->data;
return lsm_error_ok;
}
lsm_error lsm_bt_remove(void **out, lsm_bt *bt, char key) {
if (bt->root == NULL) {
return lsm_error_not_found;
}
lsm_bt_node **dest = &bt->root;
while ((*dest != NULL) && ((*dest)->key != key)) {
dest = key < (*dest)->key ? &(*dest)->left : &(*dest)->right;
}
if (*dest == NULL) {
return lsm_error_not_found;
}
*out = (*dest)->data;
bt->size--;
if (((*dest)->left != NULL) && ((*dest)->right != NULL)) {
lsm_bt_node **succ = &(*dest)->right;
while ((*succ)->left != NULL) {
succ = &(*succ)->left;
}
(*dest)->key = (*succ)->key;
(*dest)->data = (*succ)->data;
lsm_bt_node *succ_replacement = (*succ)->right;
lsm_bt_node_free(*succ);
*succ = succ_replacement;
} else {
lsm_bt_node *replacement =
(*dest)->left != NULL ? (*dest)->left : (*dest)->right;
lsm_bt_node_free(*dest);
*dest = replacement;
}
return lsm_error_ok;
}

View File

@ -6,17 +6,18 @@
/**
* Initialize a new lsm_store struct.
*
* @param lsm_store pointer to where to store the newly allocated object's pointer
* @param lsm_store pointer to where to store the newly allocated object's
* pointer
* @return success of the function
*/
lsm_error lsm_store_init(lsm_store **ptr) {
lsm_store *store = calloc(1, sizeof(lsm_store));
/* lsm_error lsm_store_init(lsm_store **ptr) { */
/* lsm_store *store = calloc(1, sizeof(lsm_store)); */
if (store == NULL) {
return lsm_error_failed_alloc;
}
/* if (store == NULL) { */
/* return lsm_error_failed_alloc; */
/* } */
*ptr = store;
/* *ptr = store; */
return lsm_error_ok;
}
/* return lsm_error_ok; */
/* } */

View File

@ -1,7 +1,7 @@
#include <stdlib.h>
#include "lsm_store_node.h"
#include "lsm.h"
#include "lsm_store_node.h"
lsm_error lsm_store_inode_init(lsm_store_inode **ptr, const char c) {
lsm_store_inode *node = calloc(1, sizeof(lsm_store_inode));
@ -28,7 +28,8 @@ lsm_error lsm_store_node_init(lsm_store_node **ptr) {
return lsm_error_ok;
}
lsm_error lsm_store_node_search(lsm_store_node **out_ptr, lsm_store_node *node, const char c) {
lsm_error lsm_store_node_search(lsm_store_node **out_ptr, lsm_store_node *node,
const char c) {
if (node->size == 0) {
return lsm_error_not_found;
}

View File

@ -23,10 +23,10 @@ lsm_error lsm_store_inode_init(lsm_store_inode **ptr, const char c);
* binary tree.
*/
typedef struct lsm_store_node {
lsm_entry *entry;
/* lsm_entry *entry; */
lsm_store_inode *root;
uint8_t size;
lsm_string skip;
/* lsm_string skip; */
} lsm_store_node;
/**
@ -37,6 +37,7 @@ lsm_error lsm_store_node_init(lsm_store_node **out);
/**
* Search for the next node following the given character, if present.
*/
lsm_error lsm_store_node_search(lsm_store_node **out, lsm_store_node *node, const char c);
lsm_error lsm_store_node_search(lsm_store_node **out, lsm_store_node *node,
const char c);
#endif

View File

@ -0,0 +1,61 @@
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "lsm.h"
#include "lsm/str_internal.h"
lsm_error lsm_str_init_zero(lsm_str **ptr) {
lsm_str *str = calloc(1, sizeof(lsm_str));
if (str == NULL) {
return lsm_error_failed_alloc;
}
*ptr = str;
return lsm_error_ok;
}
void lsm_str_init_prealloc(lsm_str *str, char *s) {
str->len = strlen(s);
if (str->len <= 8) {
memcpy(str->data.val, s, str->len);
free(s);
} else {
str->data.ptr = s;
}
}
lsm_error lsm_str_init(lsm_str **ptr, char *s) {
lsm_str *str = calloc(1, sizeof(lsm_str));
if (str == NULL) {
return lsm_error_failed_alloc;
}
lsm_str_init_prealloc(str, s);
*ptr = str;
return lsm_error_ok;
}
void lsm_str_zero(lsm_str *str) {
if (str->len > 8) {
free(str->data.ptr);
}
str->len = 0;
}
void lsm_str_free(lsm_str *str) {
if (str->len > 8) {
free(str->data.ptr);
}
free(str);
}
uint64_t lsm_str_len(lsm_str *str) { return str->len; }

View File

@ -0,0 +1 @@
#include "lsm/trie_internal.h"

109
lsm/test/bt/bt.c 100644
View File

@ -0,0 +1,109 @@
#include "test.h"
#include "lsm/bt_internal.h"
#define BT_INIT() \
lsm_bt *bt; \
TEST_CHECK(lsm_bt_init(&bt) == lsm_error_ok); \
TEST_CHECK(bt != NULL)
void test_init() {
BT_INIT();
lsm_bt_free(bt);
}
void test_insert_first() {
BT_INIT();
TEST_CHECK(lsm_bt_insert(bt, 'a', (void *)1) == lsm_error_ok);
TEST_CHECK(lsm_bt_insert(bt, 'a', (void *)1) == lsm_error_already_present);
void *data;
TEST_CHECK(lsm_bt_search(&data, bt, 'a') == lsm_error_ok);
TEST_CHECK(data == (void *)1);
TEST_CHECK(lsm_bt_search(&data, bt, 'b') == lsm_error_not_found);
lsm_bt_free(bt);
}
void test_insert_two() {
BT_INIT();
TEST_CHECK(lsm_bt_insert(bt, 'a', (void *)1) == lsm_error_ok);
TEST_CHECK(lsm_bt_insert(bt, 'a', (void *)1) == lsm_error_already_present);
TEST_CHECK(lsm_bt_insert(bt, 'b', (void *)2) == lsm_error_ok);
TEST_CHECK(lsm_bt_insert(bt, 'b', (void *)2) == lsm_error_already_present);
void *data;
TEST_CHECK(lsm_bt_search(&data, bt, 'a') == lsm_error_ok);
TEST_CHECK(data == (void *)1);
TEST_CHECK(lsm_bt_search(&data, bt, 'b') == lsm_error_ok);
TEST_CHECK(data == (void *)2);
TEST_CHECK(lsm_bt_search(&data, bt, 'c') == lsm_error_not_found);
lsm_bt_free(bt);
}
void test_insert_multiple() {
char chars[] = "falcoep";
size_t char_count = sizeof(chars) / sizeof(char);
BT_INIT();
for (size_t i = 0; i < char_count; i++) {
TEST_CHECK(lsm_bt_insert(bt, chars[i], (void *)(i + 1)) == lsm_error_ok);
}
void *data;
for (size_t i = 0; i < char_count; i++) {
TEST_CHECK(lsm_bt_insert(bt, chars[i], (void *)(i + 1)) == lsm_error_already_present);
TEST_CHECK(lsm_bt_search(&data, bt, chars[i]) == lsm_error_ok);
TEST_CHECK(data == (void *)(i + 1));
}
lsm_bt_free(bt);
}
void test_remove_root() {
BT_INIT();
TEST_CHECK(lsm_bt_insert(bt, 'a', (void *)1) == lsm_error_ok);
void *data;
TEST_CHECK(lsm_bt_remove(&data, bt, 'a') == lsm_error_ok);
TEST_CHECK(data == (void *)1);
TEST_CHECK(bt->root == NULL);
lsm_bt_free(bt);
}
void test_remove_multiple() {
char chars[] = "falcoep";
size_t char_count = sizeof(chars) / sizeof(char);
BT_INIT();
for (size_t i = 0; i < char_count; i++) {
TEST_CHECK(lsm_bt_insert(bt, chars[i], (void *)(i + 1)) == lsm_error_ok);
}
void *data;
TEST_CHECK(lsm_bt_remove(&data, bt, 'l') == lsm_error_ok);
TEST_CHECK(data == (void *)3);
TEST_CHECK(lsm_bt_remove(&data, bt, 'l') == lsm_error_not_found);
TEST_CHECK(lsm_bt_remove(&data, bt, 'e') == lsm_error_ok);
TEST_CHECK(data == (void *)6);
TEST_CHECK(lsm_bt_remove(&data, bt, 'e') == lsm_error_not_found);
lsm_bt_free(bt);
}
TEST_LIST = {
{ "test init", test_init },
{ "test insert first", test_insert_first },
{ "test insert two", test_insert_two },
{ "test insert multiple", test_insert_multiple },
{ "test remove root", test_remove_root },
{ "test remove multiple", test_remove_multiple },
{ NULL, NULL }
};

1839
lsm/test/test.h 100644

File diff suppressed because it is too large Load Diff