From b552e0a81b0babc8626cd15b0ed8682b1f4e14fe Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Wed, 25 Oct 2023 10:57:45 +0200 Subject: [PATCH] feat(lsm): some more string functions; start of data streaming api --- Makefile | 7 ++- config.mk | 6 +- lsm/include/lsm.h | 7 ++- lsm/include/lsm/store.h | 43 +++++++++++++- lsm/include/lsm/str.h | 8 +++ lsm/src/_include/lsm/store_internal.h | 15 ++++- lsm/src/store/lsm_store.c | 81 ++++++++++++++++++++++++++- lsm/src/store/lsm_store_entry.c | 12 ++-- lsm/src/str/lsm_str.c | 37 ++++++++++++ 9 files changed, 199 insertions(+), 17 deletions(-) diff --git a/Makefile b/Makefile index ac7af46..1206b6c 100644 --- a/Makefile +++ b/Makefile @@ -39,8 +39,12 @@ objs: $(OBJS) libtrie: $(MAKE) -C trie +.PHONY: liblsm +liblsm: + $(MAKE) -C lsm + .PHONY: bin -$(BIN): libtrie $(OBJS) +$(BIN): libtrie liblsm $(OBJS) $(CC) -o $@ $(OBJS) $(_LDFLAGS) $(BUILD_DIR)/$(SRC_DIR)/%.c.o: $(SRC_DIR)/%.c @@ -104,6 +108,7 @@ fmt: clean: rm -rf $(BUILD_DIR) $(MAKE) -C trie clean + $(MAKE) -C lsm clean .PHONY: bear diff --git a/config.mk b/config.mk index 78bdb30..8336cd1 100644 --- a/config.mk +++ b/config.mk @@ -7,9 +7,9 @@ SRC_DIR = src TEST_DIR = test THIRDPARTY_DIR = thirdparty -INC_DIRS = include $(THIRDPARTY_DIR)/include trie/include -LIBS = trie m -LIB_DIRS = ./trie/build +INC_DIRS = include $(THIRDPARTY_DIR)/include trie/include lsm/include +LIBS = trie m lsm +LIB_DIRS = ./trie/build ./lsm/build # -MMD: generate a .d file for every source file. This file can be imported by # make and makes make aware that a header file has been changed, ensuring an diff --git a/lsm/include/lsm.h b/lsm/include/lsm.h index 8ecb958..f5ca373 100644 --- a/lsm/include/lsm.h +++ b/lsm/include/lsm.h @@ -3,7 +3,12 @@ #include -#define LSM_MAX_SKIP_SIZE 8 +#define LSM_RES(x) \ + { \ + lsm_error res = x; \ + if (res != lsm_error_ok) \ + return res; \ + } typedef enum lsm_error { lsm_error_ok = 0, diff --git a/lsm/include/lsm/store.h b/lsm/include/lsm/store.h index e542f9d..7edcc03 100644 --- a/lsm/include/lsm/store.h +++ b/lsm/include/lsm/store.h @@ -7,6 +7,8 @@ #include "lsm.h" #include "lsm/str.h" +#define LSM_STORE_DISK_THRESHOLD 1024 + /** * The type of an entry attribute. * @@ -73,7 +75,7 @@ lsm_error lsm_entry_attr_insert(lsm_entry *entry, lsm_attr_type type, * Remove an atribute from the given entry, if present. * * @param out pointer to store removed data pointer in. If NULL, data pointer - * can get leaked. + * will be leaked. * @param entry entry to remove attribute from * @param type type of attribute to remove */ @@ -95,6 +97,15 @@ typedef struct lsm_store lsm_store; */ lsm_error lsm_store_init(lsm_store **ptr); +/** + * Open the given database file and load it into a new store object. + * + * @param ptr pointer to store newly allocated store + * @param db_path path to the database file + * @param data_path path to the data directory + */ +lsm_error lsm_store_open(lsm_store **ptr, char *db_path, char *data_path); + /** * Dealocate an existing lsm_store object. * @@ -102,4 +113,34 @@ lsm_error lsm_store_init(lsm_store **ptr); */ void lsm_store_free(lsm_store *store); +/** + * Search for an entry in the store. + * + * @param out pointer to store entry pointer in + * @param store store to search in + * @param key key to look with + */ +lsm_error lsm_store_search(lsm_entry **out, lsm_store *store, lsm_str *key); + +/** + * Allocate a new entry in the store with the specified key. + * + * @param out pointer to store new entry pointer in + * @param store store to modify + * @param key key to add; ownership of key pointer is taken over + */ +lsm_error lsm_store_insert(lsm_entry **out, lsm_store *store, lsm_str *key); + +/** + * Append new data to the given entry, which is expected to be in the store. + * + * This function will append either to disk or to memory, depending on the + * length of the entry's data. + * + * @param store store the entry is stored in + * @param entry entry to append data to + * @param data data to append + */ +lsm_error lsm_store_data_append(lsm_store *store, lsm_entry *entry, lsm_str *data); + #endif diff --git a/lsm/include/lsm/str.h b/lsm/include/lsm/str.h index 52659c1..bc18fa9 100644 --- a/lsm/include/lsm/str.h +++ b/lsm/include/lsm/str.h @@ -151,4 +151,12 @@ lsm_error lsm_str_truncate(lsm_str *s, uint64_t new_len); */ lsm_error lsm_str_split(lsm_str *s, lsm_str *s2, uint64_t index); +/** + * Append s2 to s. s2 is left untouched. + * + * @param s string to append s2 to + * @param s2 string to append to s + */ +lsm_error lsm_str_append(lsm_str *s, lsm_str *s2); + #endif diff --git a/lsm/src/_include/lsm/store_internal.h b/lsm/src/_include/lsm/store_internal.h index b4a833c..e77e879 100644 --- a/lsm/src/_include/lsm/store_internal.h +++ b/lsm/src/_include/lsm/store_internal.h @@ -1,6 +1,8 @@ #ifndef LSM_STORE_INTERNAL #define LSM_STORE_INTERNAL +#include + #include "lsm/store.h" #include "lsm/str_internal.h" #include "lsm/trie.h" @@ -11,17 +13,26 @@ typedef struct lsm_attr { } lsm_attr; struct lsm_entry { - lsm_str key; + lsm_str *key; struct { uint64_t count; uint64_t bitmap; lsm_attr *items; } attrs; - lsm_str data; + struct { + uint64_t len; + union { + FILE *file; + char *ptr; + } value; + bool on_disk; + } data; }; struct lsm_store { lsm_trie *trie; + char *data_path; + char *db_path; }; #endif diff --git a/lsm/src/store/lsm_store.c b/lsm/src/store/lsm_store.c index 8ba5740..d69f42d 100644 --- a/lsm/src/store/lsm_store.c +++ b/lsm/src/store/lsm_store.c @@ -1,6 +1,81 @@ +#include +#include + +#include "lsm.h" #include "lsm/store.h" +#include "lsm/trie.h" #include "lsm/store_internal.h" -/* lsm_error lsm_store_init(lsm_store **ptr) { */ -/* lsm_store *store = */ -/* } */ +lsm_error lsm_store_init(lsm_store **ptr) { + lsm_store *store = calloc(1, sizeof(lsm_store)); + + if (store == NULL) { + return lsm_error_failed_alloc; + } + + lsm_error res = lsm_trie_init(&store->trie); + + if (res != lsm_error_ok) { + free(store); + + return res; + } + + *ptr = store; + + return lsm_error_ok; +} + +lsm_error lsm_store_open(lsm_store **ptr, char *db_path, char *data_path) { + lsm_store *store; + LSM_RES(lsm_store_init(&store)); + + // TODO implement all of reading the db file + + store->db_path = db_path; + store->data_path = data_path; + + *ptr = store; + + return lsm_error_ok; +} + +lsm_error lsm_store_search(lsm_entry **out, lsm_store *store, lsm_str *key) { + return lsm_trie_search((void **)out, store->trie, key); +} + +lsm_error lsm_store_insert(lsm_entry **out, lsm_store *store, lsm_str *key) { + lsm_entry *entry; + + LSM_RES(lsm_entry_init(&entry)); + LSM_RES(lsm_trie_insert(store->trie, key, entry)); + + entry->key = key; + *out = entry; + + return lsm_error_ok; +} + +lsm_error lsm_store_data_append(lsm_store *store, lsm_entry *entry, lsm_str *data) { + uint64_t new_len = entry->data.len + lsm_str_len(data); + + // Data is in memory and still fits -> keep it in memory + if ((new_len <= LSM_STORE_DISK_THRESHOLD) && (!entry->data.on_disk)) { + char *buf = realloc(entry->data.value.ptr, new_len * sizeof(char)); + + if (buf == NULL) { + return lsm_error_failed_alloc; + } + + memcpy(&buf[entry->data.len], lsm_str_ptr(data), lsm_str_len(data)); + entry->data.value.ptr = buf; + entry->data.len = new_len; + } + // Data will end up on disk + else { + // Data is not yet on disk, so we create the file + if (!entry->data.on_disk) { + + } + } +} diff --git a/lsm/src/store/lsm_store_entry.c b/lsm/src/store/lsm_store_entry.c index aa1c7fc..a64614e 100644 --- a/lsm/src/store/lsm_store_entry.c +++ b/lsm/src/store/lsm_store_entry.c @@ -27,15 +27,15 @@ lsm_error lsm_entry_attr_get(lsm_str **out, lsm_entry *entry, return lsm_error_not_found; } - for (uint64_t i = 0; i < entry->attrs.count; i++) { - if (entry->attrs.items[i].type == type) { - *out = entry->attrs.items[i].str; + uint64_t i = 0; - return lsm_error_ok; - } + while (entry->attrs.items[i].type != type) { + i++; } - return lsm_error_not_found; + *out = entry->attrs.items[i].str; + + return lsm_error_ok; } lsm_error lsm_entry_attr_remove(lsm_str **out, lsm_entry *entry, diff --git a/lsm/src/str/lsm_str.c b/lsm/src/str/lsm_str.c index 2244e52..aea5e02 100644 --- a/lsm/src/str/lsm_str.c +++ b/lsm/src/str/lsm_str.c @@ -196,3 +196,40 @@ bool lsm_str_eq(lsm_str *s1, lsm_str *s2) { return memcmp(lsm_str_ptr(s1), lsm_str_ptr(s2), s1->len) == 0; } + +lsm_error lsm_str_append(lsm_str *s, lsm_str *s2) { + if (s2->len == 0) { + return lsm_error_ok; + } + + uint64_t new_len = s->len + s2->len; + + if (new_len <= 8) { + memcpy(&s->data.val[s->len], s2->data.val, s2->len); + } else { + char *buf; + + if (s->len <= 8) { + buf = malloc(new_len * sizeof(char)); + + if (buf == NULL) { + return lsm_error_failed_alloc; + } + + memcpy(buf, s->data.val, s->len); + } else { + buf = realloc(s->data.ptr, new_len * sizeof(char)); + + if (buf == NULL) { + return lsm_error_failed_alloc; + } + } + + memcpy(&buf[s->len], lsm_str_ptr(s2), s2->len); + s->data.ptr = buf; + } + + s->len += s2->len; + + return lsm_error_ok; +}