feat(lsm): some more string functions; start of data streaming api

lsm
Jef Roosens 2023-10-25 10:57:45 +02:00
parent fca8495de4
commit b552e0a81b
Signed by: Jef Roosens
GPG Key ID: B75D4F293C7052DB
9 changed files with 199 additions and 17 deletions

View File

@ -39,8 +39,12 @@ objs: $(OBJS)
libtrie: libtrie:
$(MAKE) -C trie $(MAKE) -C trie
.PHONY: liblsm
liblsm:
$(MAKE) -C lsm
.PHONY: bin .PHONY: bin
$(BIN): libtrie $(OBJS) $(BIN): libtrie liblsm $(OBJS)
$(CC) -o $@ $(OBJS) $(_LDFLAGS) $(CC) -o $@ $(OBJS) $(_LDFLAGS)
$(BUILD_DIR)/$(SRC_DIR)/%.c.o: $(SRC_DIR)/%.c $(BUILD_DIR)/$(SRC_DIR)/%.c.o: $(SRC_DIR)/%.c
@ -104,6 +108,7 @@ fmt:
clean: clean:
rm -rf $(BUILD_DIR) rm -rf $(BUILD_DIR)
$(MAKE) -C trie clean $(MAKE) -C trie clean
$(MAKE) -C lsm clean
.PHONY: bear .PHONY: bear

View File

@ -7,9 +7,9 @@ SRC_DIR = src
TEST_DIR = test TEST_DIR = test
THIRDPARTY_DIR = thirdparty THIRDPARTY_DIR = thirdparty
INC_DIRS = include $(THIRDPARTY_DIR)/include trie/include INC_DIRS = include $(THIRDPARTY_DIR)/include trie/include lsm/include
LIBS = trie m LIBS = trie m lsm
LIB_DIRS = ./trie/build LIB_DIRS = ./trie/build ./lsm/build
# -MMD: generate a .d file for every source file. This file can be imported by # -MMD: generate a .d file for every source file. This file can be imported by
# make and makes make aware that a header file has been changed, ensuring an # make and makes make aware that a header file has been changed, ensuring an

View File

@ -3,7 +3,12 @@
#include <stdint.h> #include <stdint.h>
#define LSM_MAX_SKIP_SIZE 8 #define LSM_RES(x) \
{ \
lsm_error res = x; \
if (res != lsm_error_ok) \
return res; \
}
typedef enum lsm_error { typedef enum lsm_error {
lsm_error_ok = 0, lsm_error_ok = 0,

View File

@ -7,6 +7,8 @@
#include "lsm.h" #include "lsm.h"
#include "lsm/str.h" #include "lsm/str.h"
#define LSM_STORE_DISK_THRESHOLD 1024
/** /**
* The type of an entry attribute. * The type of an entry attribute.
* *
@ -73,7 +75,7 @@ lsm_error lsm_entry_attr_insert(lsm_entry *entry, lsm_attr_type type,
* Remove an atribute from the given entry, if present. * Remove an atribute from the given entry, if present.
* *
* @param out pointer to store removed data pointer in. If NULL, data pointer * @param out pointer to store removed data pointer in. If NULL, data pointer
* can get leaked. * will be leaked.
* @param entry entry to remove attribute from * @param entry entry to remove attribute from
* @param type type of attribute to remove * @param type type of attribute to remove
*/ */
@ -95,6 +97,15 @@ typedef struct lsm_store lsm_store;
*/ */
lsm_error lsm_store_init(lsm_store **ptr); lsm_error lsm_store_init(lsm_store **ptr);
/**
* Open the given database file and load it into a new store object.
*
* @param ptr pointer to store newly allocated store
* @param db_path path to the database file
* @param data_path path to the data directory
*/
lsm_error lsm_store_open(lsm_store **ptr, char *db_path, char *data_path);
/** /**
* Dealocate an existing lsm_store object. * Dealocate an existing lsm_store object.
* *
@ -102,4 +113,34 @@ lsm_error lsm_store_init(lsm_store **ptr);
*/ */
void lsm_store_free(lsm_store *store); void lsm_store_free(lsm_store *store);
/**
* Search for an entry in the store.
*
* @param out pointer to store entry pointer in
* @param store store to search in
* @param key key to look with
*/
lsm_error lsm_store_search(lsm_entry **out, lsm_store *store, lsm_str *key);
/**
* Allocate a new entry in the store with the specified key.
*
* @param out pointer to store new entry pointer in
* @param store store to modify
* @param key key to add; ownership of key pointer is taken over
*/
lsm_error lsm_store_insert(lsm_entry **out, lsm_store *store, lsm_str *key);
/**
* Append new data to the given entry, which is expected to be in the store.
*
* This function will append either to disk or to memory, depending on the
* length of the entry's data.
*
* @param store store the entry is stored in
* @param entry entry to append data to
* @param data data to append
*/
lsm_error lsm_store_data_append(lsm_store *store, lsm_entry *entry, lsm_str *data);
#endif #endif

View File

@ -151,4 +151,12 @@ lsm_error lsm_str_truncate(lsm_str *s, uint64_t new_len);
*/ */
lsm_error lsm_str_split(lsm_str *s, lsm_str *s2, uint64_t index); lsm_error lsm_str_split(lsm_str *s, lsm_str *s2, uint64_t index);
/**
* Append s2 to s. s2 is left untouched.
*
* @param s string to append s2 to
* @param s2 string to append to s
*/
lsm_error lsm_str_append(lsm_str *s, lsm_str *s2);
#endif #endif

View File

@ -1,6 +1,8 @@
#ifndef LSM_STORE_INTERNAL #ifndef LSM_STORE_INTERNAL
#define LSM_STORE_INTERNAL #define LSM_STORE_INTERNAL
#include <stdio.h>
#include "lsm/store.h" #include "lsm/store.h"
#include "lsm/str_internal.h" #include "lsm/str_internal.h"
#include "lsm/trie.h" #include "lsm/trie.h"
@ -11,17 +13,26 @@ typedef struct lsm_attr {
} lsm_attr; } lsm_attr;
struct lsm_entry { struct lsm_entry {
lsm_str key; lsm_str *key;
struct { struct {
uint64_t count; uint64_t count;
uint64_t bitmap; uint64_t bitmap;
lsm_attr *items; lsm_attr *items;
} attrs; } attrs;
lsm_str data; struct {
uint64_t len;
union {
FILE *file;
char *ptr;
} value;
bool on_disk;
} data;
}; };
struct lsm_store { struct lsm_store {
lsm_trie *trie; lsm_trie *trie;
char *data_path;
char *db_path;
}; };
#endif #endif

View File

@ -1,6 +1,81 @@
#include <stdlib.h>
#include <string.h>
#include "lsm.h"
#include "lsm/store.h" #include "lsm/store.h"
#include "lsm/trie.h"
#include "lsm/store_internal.h" #include "lsm/store_internal.h"
/* lsm_error lsm_store_init(lsm_store **ptr) { */ lsm_error lsm_store_init(lsm_store **ptr) {
/* lsm_store *store = */ lsm_store *store = calloc(1, sizeof(lsm_store));
/* } */
if (store == NULL) {
return lsm_error_failed_alloc;
}
lsm_error res = lsm_trie_init(&store->trie);
if (res != lsm_error_ok) {
free(store);
return res;
}
*ptr = store;
return lsm_error_ok;
}
lsm_error lsm_store_open(lsm_store **ptr, char *db_path, char *data_path) {
lsm_store *store;
LSM_RES(lsm_store_init(&store));
// TODO implement all of reading the db file
store->db_path = db_path;
store->data_path = data_path;
*ptr = store;
return lsm_error_ok;
}
lsm_error lsm_store_search(lsm_entry **out, lsm_store *store, lsm_str *key) {
return lsm_trie_search((void **)out, store->trie, key);
}
lsm_error lsm_store_insert(lsm_entry **out, lsm_store *store, lsm_str *key) {
lsm_entry *entry;
LSM_RES(lsm_entry_init(&entry));
LSM_RES(lsm_trie_insert(store->trie, key, entry));
entry->key = key;
*out = entry;
return lsm_error_ok;
}
lsm_error lsm_store_data_append(lsm_store *store, lsm_entry *entry, lsm_str *data) {
uint64_t new_len = entry->data.len + lsm_str_len(data);
// Data is in memory and still fits -> keep it in memory
if ((new_len <= LSM_STORE_DISK_THRESHOLD) && (!entry->data.on_disk)) {
char *buf = realloc(entry->data.value.ptr, new_len * sizeof(char));
if (buf == NULL) {
return lsm_error_failed_alloc;
}
memcpy(&buf[entry->data.len], lsm_str_ptr(data), lsm_str_len(data));
entry->data.value.ptr = buf;
entry->data.len = new_len;
}
// Data will end up on disk
else {
// Data is not yet on disk, so we create the file
if (!entry->data.on_disk) {
}
}
}

View File

@ -27,16 +27,16 @@ lsm_error lsm_entry_attr_get(lsm_str **out, lsm_entry *entry,
return lsm_error_not_found; return lsm_error_not_found;
} }
for (uint64_t i = 0; i < entry->attrs.count; i++) { uint64_t i = 0;
if (entry->attrs.items[i].type == type) {
while (entry->attrs.items[i].type != type) {
i++;
}
*out = entry->attrs.items[i].str; *out = entry->attrs.items[i].str;
return lsm_error_ok; return lsm_error_ok;
} }
}
return lsm_error_not_found;
}
lsm_error lsm_entry_attr_remove(lsm_str **out, lsm_entry *entry, lsm_error lsm_entry_attr_remove(lsm_str **out, lsm_entry *entry,
lsm_attr_type type) { lsm_attr_type type) {

View File

@ -196,3 +196,40 @@ bool lsm_str_eq(lsm_str *s1, lsm_str *s2) {
return memcmp(lsm_str_ptr(s1), lsm_str_ptr(s2), s1->len) == 0; return memcmp(lsm_str_ptr(s1), lsm_str_ptr(s2), s1->len) == 0;
} }
lsm_error lsm_str_append(lsm_str *s, lsm_str *s2) {
if (s2->len == 0) {
return lsm_error_ok;
}
uint64_t new_len = s->len + s2->len;
if (new_len <= 8) {
memcpy(&s->data.val[s->len], s2->data.val, s2->len);
} else {
char *buf;
if (s->len <= 8) {
buf = malloc(new_len * sizeof(char));
if (buf == NULL) {
return lsm_error_failed_alloc;
}
memcpy(buf, s->data.val, s->len);
} else {
buf = realloc(s->data.ptr, new_len * sizeof(char));
if (buf == NULL) {
return lsm_error_failed_alloc;
}
}
memcpy(&buf[s->len], lsm_str_ptr(s2), s2->len);
s->data.ptr = buf;
}
s->len += s2->len;
return lsm_error_ok;
}