From fd42b446a61a4c639b8c10bac71d08867e1f281b Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Thu, 12 Oct 2023 10:06:20 +0200 Subject: [PATCH] feat(lsm): start of library --- lsm/Makefile | 95 ++++++++++++++++++++++++++++++++++++++++ lsm/config.mk | 13 ++++++ lsm/include/lsm.h | 67 ++++++++++++++++++++++++++++ lsm/src/lsm_store.c | 22 ++++++++++ lsm/src/lsm_store.h | 12 +++++ lsm/src/lsm_store_node.c | 62 ++++++++++++++++++++++++++ lsm/src/lsm_store_node.h | 42 ++++++++++++++++++ src/main.c | 2 +- 8 files changed, 314 insertions(+), 1 deletion(-) create mode 100644 lsm/Makefile create mode 100644 lsm/config.mk create mode 100644 lsm/include/lsm.h create mode 100644 lsm/src/lsm_store.c create mode 100644 lsm/src/lsm_store.h create mode 100644 lsm/src/lsm_store_node.c create mode 100644 lsm/src/lsm_store_node.h diff --git a/lsm/Makefile b/lsm/Makefile new file mode 100644 index 0000000..5352623 --- /dev/null +++ b/lsm/Makefile @@ -0,0 +1,95 @@ +# https://spin.atomicobject.com/2016/08/26/makefile-c-projects/ was a great +# base for this Makefile + +-include config.mk + +LIB := $(BUILD_DIR)/$(LIB_FILENAME) + +SRCS != find '$(SRC_DIR)' -iname '*.c' +SRCS_H != find $(INC_DIRS) -iname '*.h' +SRCS_H_INTERNAL != find $(SRC_DIR) -iname '*.h' +SRCS_TEST != find '$(TEST_DIR)' -iname '*.c' + +OBJS := $(SRCS:%=$(BUILD_DIR)/%.o) +OBJS_TEST := $(SRCS_TEST:%=$(BUILD_DIR)/%.o) +DEPS := $(SRCS:%=$(BUILD_DIR)/%.d) $(SRCS_TEST:%=$(BUILD_DIR)/%.d) + +BINS_TEST := $(OBJS_TEST:%.c.o=%) +TARGETS_TEST := $(BINS_TEST:%=test-%) +TARGETS_MEM_TEST := $(BINS_TEST:%=test-mem-%) + +_CFLAGS := $(addprefix -I,$(INC_DIRS)) $(CFLAGS) -Wall -Wextra + +.PHONY: all +all: lib + + +# =====COMPILATION===== +# Utility used by the CI to lint +.PHONY: objs +objs: $(OBJS) + +.PHONY: lib +lib: $(LIB) +$(LIB): $(OBJS) + ar -rcs $@ $(OBJS) + +$(BUILD_DIR)/$(SRC_DIR)/%.c.o: $(SRC_DIR)/%.c + mkdir -p $(dir $@) + $(CC) -c $(_CFLAGS) $< -o $@ + + +# =====TESTING===== +.PHONY: test +test: $(TARGETS_TEST) + +.PHONY: test-mem +test-mem: $(TARGETS_MEM_TEST) + +.PHONY: $(TARGETS_TEST) +$(TARGETS_TEST): test-%: % + ./$^ + +.PHONY: $(TARGETS_MEM_TEST) +$(TARGETS_MEM_TEST): test-mem-%: % + valgrind --tool=memcheck --error-exitcode=1 --track-origins=yes --leak-check=full ./$^ + +.PHONY: build-test +build-test: $(BINS_TEST) + +$(BINS_TEST): %: %.c.o $(LIB) + $(CC) \ + $^ -o $@ + +# Along with the include directory, each test includes $(TEST_DIR) (which +# contains the acutest.h header file), and the src directory of the module it's +# testing. This allows tests to access internal methods, which aren't publicly +# exposed. +$(BUILD_DIR)/$(TEST_DIR)/%.c.o: $(TEST_DIR)/%.c + mkdir -p $(dir $@) + $(CC) $(_CFLAGS) -I$(TEST_DIR) \ + -I$(dir $(@:$(BUILD_DIR)/$(TEST_DIR)/%=$(SRC_DIR)/%)) \ + -c $< -o $@ + +# =====MAINTENANCE===== +.PHONY: lint +lint: + clang-format -n --Werror $(SRCS) $(SRCS_H) $(SRCS_H_INTERNAL) + +.PHONY: fmt +fmt: + clang-format -i $(SRCS) $(SRCS_H) $(SRCS_H_INTERNAL) + +.PHONY: clean +clean: + rm -rf $(BUILD_DIR) + + +.PHONY: bear +bear: clean + bear -- make + bear --append -- make build-test + + +# Make make aware of the .d files +-include $(DEPS) diff --git a/lsm/config.mk b/lsm/config.mk new file mode 100644 index 0000000..c453c6a --- /dev/null +++ b/lsm/config.mk @@ -0,0 +1,13 @@ +LIB_FILENAME = liblsm.a + +BUILD_DIR = build +SRC_DIR = src +TEST_DIR = test +INC_DIRS = include + +# -MMD: generate a .d file for every source file. This file can be imported by +# make and makes make aware that a header file has been changed, ensuring an +# object file is also recompiled if only a header is changed. +# -MP: generate a dummy target for every header file (according to the docs it +# prevents some errors when removing header files) +CFLAGS = -MMD -MP -g diff --git a/lsm/include/lsm.h b/lsm/include/lsm.h new file mode 100644 index 0000000..6eb7fd0 --- /dev/null +++ b/lsm/include/lsm.h @@ -0,0 +1,67 @@ +#ifndef LSM +#define LSM + +#include + +#define LSM_MAX_SKIP_SIZE 8 + +typedef enum lsm_error { + lsm_error_ok = 0, + lsm_error_failed_alloc = 1, + lsm_error_not_found = 2 +} lsm_error; + +/** + * Represents a string (or really any kind of data) with a known length. Data + * with length 8 or less is stored inside the pointer, and does not allocate + * additional memory. + */ +typedef struct lsm_string { + uint64_t len; + union { + void *ptr; + char val[8]; + } str; +} lsm_string; + +/** + * The type of an attribute. Each type is represented as a single bit of a + * 32-bit integer, so they can be easily combined into a bitmap. + */ +typedef enum lsm_attr_type { + lsm_attr_type_entry_type = 1 << 0 +} lsm_attr_type; + +/** + * A single attribute associated with an entry + */ +typedef struct lsm_attr { + lsm_attr_type type; + lsm_string str; +} lsm_attr; + +/** + * Represents a collection of attributes for an entry. A collection can only + * contain one of each attribute. + */ +typedef struct lsm_attr_list { + uint64_t count; + lsm_attr *items; + uint32_t bitmap; +} lsm_attr_list; + +/** + * An entry inside an LSM store + */ +typedef struct lsm_entry { + lsm_string key; + lsm_attr_list attrs; + lsm_string data; +} lsm_entry; + +/** + * A store of entries, which manages its data both in-memory and on disk. + */ +typedef struct lsm_store lsm_store; + +#endif diff --git a/lsm/src/lsm_store.c b/lsm/src/lsm_store.c new file mode 100644 index 0000000..e167836 --- /dev/null +++ b/lsm/src/lsm_store.c @@ -0,0 +1,22 @@ +#include + +#include "lsm.h" +#include "lsm_store.h" + +/** + * Initialize a new lsm_store struct. + * + * @param lsm_store pointer to where to store the newly allocated object's pointer + * @return success of the function + */ +lsm_error lsm_store_init(lsm_store **ptr) { + lsm_store *store = calloc(1, sizeof(lsm_store)); + + if (store == NULL) { + return lsm_error_failed_alloc; + } + + *ptr = store; + + return lsm_error_ok; +} diff --git a/lsm/src/lsm_store.h b/lsm/src/lsm_store.h new file mode 100644 index 0000000..c73b2e9 --- /dev/null +++ b/lsm/src/lsm_store.h @@ -0,0 +1,12 @@ +#ifndef LSM_STORE_INTERNAL +#define LSM_STORE_INTERNAL + +#include "lsm.h" +#include "lsm_store_node.h" + +struct lsm_store { + lsm_store_node *root; + uint64_t size; +}; + +#endif diff --git a/lsm/src/lsm_store_node.c b/lsm/src/lsm_store_node.c new file mode 100644 index 0000000..63bc238 --- /dev/null +++ b/lsm/src/lsm_store_node.c @@ -0,0 +1,62 @@ +#include + +#include "lsm_store_node.h" +#include "lsm.h" + +lsm_error lsm_store_inode_init(lsm_store_inode **ptr, const char c) { + lsm_store_inode *node = calloc(1, sizeof(lsm_store_inode)); + + if (node == NULL) { + return lsm_error_failed_alloc; + } + + node->key = c; + *ptr = node; + + return lsm_error_ok; +} + +lsm_error lsm_store_node_init(lsm_store_node **ptr) { + lsm_store_node *node = calloc(1, sizeof(lsm_store_node)); + + if (node == NULL) { + return lsm_error_failed_alloc; + } + + *ptr = node; + + return lsm_error_ok; +} + +lsm_error lsm_store_node_search(lsm_store_node **out_ptr, lsm_store_node *node, const char c) { + if (node->size == 0) { + return lsm_error_not_found; + } + + lsm_store_inode *parent = node->root; + lsm_store_inode *child; + lsm_store_node *out = NULL; + + while (1) { + if (parent->key == c) { + out = parent->next; + break; + } + + child = (c < parent->key) ? parent->left : parent->right; + + if (child == NULL) { + break; + } + + parent = child; + }; + + if (out == NULL) { + return lsm_error_not_found; + } + + *out_ptr = out; + + return lsm_error_ok; +} diff --git a/lsm/src/lsm_store_node.h b/lsm/src/lsm_store_node.h new file mode 100644 index 0000000..7fbcff3 --- /dev/null +++ b/lsm/src/lsm_store_node.h @@ -0,0 +1,42 @@ +#ifndef LSM_STORE_NODE_INTERNAL +#define LSM_STORE_NODE_INTERNAL + +#include "lsm.h" + +/** + * A node inside a store node's internal binary tree. + */ +typedef struct lsm_store_inode { + struct lsm_store_inode *left; + struct lsm_store_inode *right; + struct lsm_store_node *next; + char key; +} lsm_store_inode; + +/** + * Initialize a new lsm_store_inode. + */ +lsm_error lsm_store_inode_init(lsm_store_inode **ptr, const char c); + +/** + * A node inside the store's trie structure. Internally, each node manages a + * binary tree. + */ +typedef struct lsm_store_node { + lsm_entry *entry; + lsm_store_inode *root; + uint8_t size; + lsm_string skip; +} lsm_store_node; + +/** + * Initialize a new lsm_store_node. + */ +lsm_error lsm_store_node_init(lsm_store_node **out); + +/** + * Search for the next node following the given character, if present. + */ +lsm_error lsm_store_node_search(lsm_store_node **out, lsm_store_node *node, const char c); + +#endif diff --git a/src/main.c b/src/main.c index fa9d95e..f32f83f 100644 --- a/src/main.c +++ b/src/main.c @@ -27,7 +27,7 @@ int main() { int port = atoi(port_str); - if (port <= 0 || port >= 2 << 16) { + if (port <= 0 || port >= 1 << 16) { critical(1, "Invalid TCP port %s", port_str); }