From fd42b446a61a4c639b8c10bac71d08867e1f281b Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Thu, 12 Oct 2023 10:06:20 +0200 Subject: [PATCH 01/70] feat(lsm): start of library --- lsm/Makefile | 95 ++++++++++++++++++++++++++++++++++++++++ lsm/config.mk | 13 ++++++ lsm/include/lsm.h | 67 ++++++++++++++++++++++++++++ lsm/src/lsm_store.c | 22 ++++++++++ lsm/src/lsm_store.h | 12 +++++ lsm/src/lsm_store_node.c | 62 ++++++++++++++++++++++++++ lsm/src/lsm_store_node.h | 42 ++++++++++++++++++ src/main.c | 2 +- 8 files changed, 314 insertions(+), 1 deletion(-) create mode 100644 lsm/Makefile create mode 100644 lsm/config.mk create mode 100644 lsm/include/lsm.h create mode 100644 lsm/src/lsm_store.c create mode 100644 lsm/src/lsm_store.h create mode 100644 lsm/src/lsm_store_node.c create mode 100644 lsm/src/lsm_store_node.h diff --git a/lsm/Makefile b/lsm/Makefile new file mode 100644 index 0000000..5352623 --- /dev/null +++ b/lsm/Makefile @@ -0,0 +1,95 @@ +# https://spin.atomicobject.com/2016/08/26/makefile-c-projects/ was a great +# base for this Makefile + +-include config.mk + +LIB := $(BUILD_DIR)/$(LIB_FILENAME) + +SRCS != find '$(SRC_DIR)' -iname '*.c' +SRCS_H != find $(INC_DIRS) -iname '*.h' +SRCS_H_INTERNAL != find $(SRC_DIR) -iname '*.h' +SRCS_TEST != find '$(TEST_DIR)' -iname '*.c' + +OBJS := $(SRCS:%=$(BUILD_DIR)/%.o) +OBJS_TEST := $(SRCS_TEST:%=$(BUILD_DIR)/%.o) +DEPS := $(SRCS:%=$(BUILD_DIR)/%.d) $(SRCS_TEST:%=$(BUILD_DIR)/%.d) + +BINS_TEST := $(OBJS_TEST:%.c.o=%) +TARGETS_TEST := $(BINS_TEST:%=test-%) +TARGETS_MEM_TEST := $(BINS_TEST:%=test-mem-%) + +_CFLAGS := $(addprefix -I,$(INC_DIRS)) $(CFLAGS) -Wall -Wextra + +.PHONY: all +all: lib + + +# =====COMPILATION===== +# Utility used by the CI to lint +.PHONY: objs +objs: $(OBJS) + +.PHONY: lib +lib: $(LIB) +$(LIB): $(OBJS) + ar -rcs $@ $(OBJS) + +$(BUILD_DIR)/$(SRC_DIR)/%.c.o: $(SRC_DIR)/%.c + mkdir -p $(dir $@) + $(CC) -c $(_CFLAGS) $< -o $@ + + +# =====TESTING===== +.PHONY: test +test: $(TARGETS_TEST) + +.PHONY: test-mem +test-mem: $(TARGETS_MEM_TEST) + +.PHONY: $(TARGETS_TEST) +$(TARGETS_TEST): test-%: % + ./$^ + +.PHONY: $(TARGETS_MEM_TEST) +$(TARGETS_MEM_TEST): test-mem-%: % + valgrind --tool=memcheck --error-exitcode=1 --track-origins=yes --leak-check=full ./$^ + +.PHONY: build-test +build-test: $(BINS_TEST) + +$(BINS_TEST): %: %.c.o $(LIB) + $(CC) \ + $^ -o $@ + +# Along with the include directory, each test includes $(TEST_DIR) (which +# contains the acutest.h header file), and the src directory of the module it's +# testing. This allows tests to access internal methods, which aren't publicly +# exposed. +$(BUILD_DIR)/$(TEST_DIR)/%.c.o: $(TEST_DIR)/%.c + mkdir -p $(dir $@) + $(CC) $(_CFLAGS) -I$(TEST_DIR) \ + -I$(dir $(@:$(BUILD_DIR)/$(TEST_DIR)/%=$(SRC_DIR)/%)) \ + -c $< -o $@ + +# =====MAINTENANCE===== +.PHONY: lint +lint: + clang-format -n --Werror $(SRCS) $(SRCS_H) $(SRCS_H_INTERNAL) + +.PHONY: fmt +fmt: + clang-format -i $(SRCS) $(SRCS_H) $(SRCS_H_INTERNAL) + +.PHONY: clean +clean: + rm -rf $(BUILD_DIR) + + +.PHONY: bear +bear: clean + bear -- make + bear --append -- make build-test + + +# Make make aware of the .d files +-include $(DEPS) diff --git a/lsm/config.mk b/lsm/config.mk new file mode 100644 index 0000000..c453c6a --- /dev/null +++ b/lsm/config.mk @@ -0,0 +1,13 @@ +LIB_FILENAME = liblsm.a + +BUILD_DIR = build +SRC_DIR = src +TEST_DIR = test +INC_DIRS = include + +# -MMD: generate a .d file for every source file. This file can be imported by +# make and makes make aware that a header file has been changed, ensuring an +# object file is also recompiled if only a header is changed. +# -MP: generate a dummy target for every header file (according to the docs it +# prevents some errors when removing header files) +CFLAGS = -MMD -MP -g diff --git a/lsm/include/lsm.h b/lsm/include/lsm.h new file mode 100644 index 0000000..6eb7fd0 --- /dev/null +++ b/lsm/include/lsm.h @@ -0,0 +1,67 @@ +#ifndef LSM +#define LSM + +#include + +#define LSM_MAX_SKIP_SIZE 8 + +typedef enum lsm_error { + lsm_error_ok = 0, + lsm_error_failed_alloc = 1, + lsm_error_not_found = 2 +} lsm_error; + +/** + * Represents a string (or really any kind of data) with a known length. Data + * with length 8 or less is stored inside the pointer, and does not allocate + * additional memory. + */ +typedef struct lsm_string { + uint64_t len; + union { + void *ptr; + char val[8]; + } str; +} lsm_string; + +/** + * The type of an attribute. Each type is represented as a single bit of a + * 32-bit integer, so they can be easily combined into a bitmap. + */ +typedef enum lsm_attr_type { + lsm_attr_type_entry_type = 1 << 0 +} lsm_attr_type; + +/** + * A single attribute associated with an entry + */ +typedef struct lsm_attr { + lsm_attr_type type; + lsm_string str; +} lsm_attr; + +/** + * Represents a collection of attributes for an entry. A collection can only + * contain one of each attribute. + */ +typedef struct lsm_attr_list { + uint64_t count; + lsm_attr *items; + uint32_t bitmap; +} lsm_attr_list; + +/** + * An entry inside an LSM store + */ +typedef struct lsm_entry { + lsm_string key; + lsm_attr_list attrs; + lsm_string data; +} lsm_entry; + +/** + * A store of entries, which manages its data both in-memory and on disk. + */ +typedef struct lsm_store lsm_store; + +#endif diff --git a/lsm/src/lsm_store.c b/lsm/src/lsm_store.c new file mode 100644 index 0000000..e167836 --- /dev/null +++ b/lsm/src/lsm_store.c @@ -0,0 +1,22 @@ +#include + +#include "lsm.h" +#include "lsm_store.h" + +/** + * Initialize a new lsm_store struct. + * + * @param lsm_store pointer to where to store the newly allocated object's pointer + * @return success of the function + */ +lsm_error lsm_store_init(lsm_store **ptr) { + lsm_store *store = calloc(1, sizeof(lsm_store)); + + if (store == NULL) { + return lsm_error_failed_alloc; + } + + *ptr = store; + + return lsm_error_ok; +} diff --git a/lsm/src/lsm_store.h b/lsm/src/lsm_store.h new file mode 100644 index 0000000..c73b2e9 --- /dev/null +++ b/lsm/src/lsm_store.h @@ -0,0 +1,12 @@ +#ifndef LSM_STORE_INTERNAL +#define LSM_STORE_INTERNAL + +#include "lsm.h" +#include "lsm_store_node.h" + +struct lsm_store { + lsm_store_node *root; + uint64_t size; +}; + +#endif diff --git a/lsm/src/lsm_store_node.c b/lsm/src/lsm_store_node.c new file mode 100644 index 0000000..63bc238 --- /dev/null +++ b/lsm/src/lsm_store_node.c @@ -0,0 +1,62 @@ +#include + +#include "lsm_store_node.h" +#include "lsm.h" + +lsm_error lsm_store_inode_init(lsm_store_inode **ptr, const char c) { + lsm_store_inode *node = calloc(1, sizeof(lsm_store_inode)); + + if (node == NULL) { + return lsm_error_failed_alloc; + } + + node->key = c; + *ptr = node; + + return lsm_error_ok; +} + +lsm_error lsm_store_node_init(lsm_store_node **ptr) { + lsm_store_node *node = calloc(1, sizeof(lsm_store_node)); + + if (node == NULL) { + return lsm_error_failed_alloc; + } + + *ptr = node; + + return lsm_error_ok; +} + +lsm_error lsm_store_node_search(lsm_store_node **out_ptr, lsm_store_node *node, const char c) { + if (node->size == 0) { + return lsm_error_not_found; + } + + lsm_store_inode *parent = node->root; + lsm_store_inode *child; + lsm_store_node *out = NULL; + + while (1) { + if (parent->key == c) { + out = parent->next; + break; + } + + child = (c < parent->key) ? parent->left : parent->right; + + if (child == NULL) { + break; + } + + parent = child; + }; + + if (out == NULL) { + return lsm_error_not_found; + } + + *out_ptr = out; + + return lsm_error_ok; +} diff --git a/lsm/src/lsm_store_node.h b/lsm/src/lsm_store_node.h new file mode 100644 index 0000000..7fbcff3 --- /dev/null +++ b/lsm/src/lsm_store_node.h @@ -0,0 +1,42 @@ +#ifndef LSM_STORE_NODE_INTERNAL +#define LSM_STORE_NODE_INTERNAL + +#include "lsm.h" + +/** + * A node inside a store node's internal binary tree. + */ +typedef struct lsm_store_inode { + struct lsm_store_inode *left; + struct lsm_store_inode *right; + struct lsm_store_node *next; + char key; +} lsm_store_inode; + +/** + * Initialize a new lsm_store_inode. + */ +lsm_error lsm_store_inode_init(lsm_store_inode **ptr, const char c); + +/** + * A node inside the store's trie structure. Internally, each node manages a + * binary tree. + */ +typedef struct lsm_store_node { + lsm_entry *entry; + lsm_store_inode *root; + uint8_t size; + lsm_string skip; +} lsm_store_node; + +/** + * Initialize a new lsm_store_node. + */ +lsm_error lsm_store_node_init(lsm_store_node **out); + +/** + * Search for the next node following the given character, if present. + */ +lsm_error lsm_store_node_search(lsm_store_node **out, lsm_store_node *node, const char c); + +#endif diff --git a/src/main.c b/src/main.c index fa9d95e..f32f83f 100644 --- a/src/main.c +++ b/src/main.c @@ -27,7 +27,7 @@ int main() { int port = atoi(port_str); - if (port <= 0 || port >= 2 << 16) { + if (port <= 0 || port >= 1 << 16) { critical(1, "Invalid TCP port %s", port_str); } From 13e42181a2ef07137fd227496014daa721baf301 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Fri, 13 Oct 2023 10:29:00 +0200 Subject: [PATCH 02/70] feat(lsm): implement bt insert & search --- lsm/include/lsm.h | 3 +- lsm/src/bt/lsm_bt.c | 70 ++ lsm/src/bt/lsm_bt.h | 78 ++ lsm/test/bt/bt.c | 67 ++ lsm/test/test.h | 1839 +++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 2056 insertions(+), 1 deletion(-) create mode 100644 lsm/src/bt/lsm_bt.c create mode 100644 lsm/src/bt/lsm_bt.h create mode 100644 lsm/test/bt/bt.c create mode 100644 lsm/test/test.h diff --git a/lsm/include/lsm.h b/lsm/include/lsm.h index 6eb7fd0..b216d71 100644 --- a/lsm/include/lsm.h +++ b/lsm/include/lsm.h @@ -8,7 +8,8 @@ typedef enum lsm_error { lsm_error_ok = 0, lsm_error_failed_alloc = 1, - lsm_error_not_found = 2 + lsm_error_not_found = 2, + lsm_error_already_present = 3 } lsm_error; /** diff --git a/lsm/src/bt/lsm_bt.c b/lsm/src/bt/lsm_bt.c new file mode 100644 index 0000000..6b7b7bf --- /dev/null +++ b/lsm/src/bt/lsm_bt.c @@ -0,0 +1,70 @@ +#include + +#include "lsm.h" +#include "lsm_bt.h" + +lsm_error lsm_bt_node_init(lsm_bt_node **ptr, const char key, void *data) { + lsm_bt_node *node = calloc(1, sizeof(lsm_bt_node)); + + if (node == NULL) { + return lsm_error_failed_alloc; + } + + node->key = key; + node->data = data; + *ptr = node; + + return lsm_error_ok; +} + +lsm_error lsm_bt_init(lsm_bt **ptr) { + lsm_bt *bt = calloc(1, sizeof(lsm_bt)); + + if (bt == NULL) { + return lsm_error_failed_alloc; + } + + *ptr = bt; + + return lsm_error_ok; +} + +lsm_error lsm_bt_insert(lsm_bt *bt, char key, void *data) { + lsm_bt_node **dest = &bt->root; + + // Traverse down the tree until we reach the new point to insert our node + while (*dest != NULL) { + if ((*dest)->key == key) { + return lsm_error_already_present; + } + + dest = key < (*dest)->key ? &(*dest)->left : &(*dest)->right; + } + + lsm_bt_node *node; + + if (lsm_bt_node_init(&node, key, data) != lsm_error_ok) { + return lsm_error_failed_alloc; + } + + *dest = node; + bt->size++; + + return lsm_error_ok; +} + +lsm_error lsm_bt_search(void **out, lsm_bt *bt, char key) { + lsm_bt_node *node = bt->root; + + while (node != NULL) { + if (node->key == key) { + *out = node->data; + + return lsm_error_ok; + } + + node = key < node->key ? node->left : node->right; + } + + return lsm_error_not_found; +} diff --git a/lsm/src/bt/lsm_bt.h b/lsm/src/bt/lsm_bt.h new file mode 100644 index 0000000..a886f68 --- /dev/null +++ b/lsm/src/bt/lsm_bt.h @@ -0,0 +1,78 @@ +#ifndef LSM_BT_INTERNAL +#define LSM_BT_INTERNAL + +#include + +#include "lsm.h" + +/** + * Node inside a binary tree + */ +typedef struct lsm_bt_node { + struct lsm_bt_node *left; + struct lsm_bt_node *right; + void *data; + char key; +} lsm_bt_node; + +/** + * Initialize a new binary tree node + * + * @param ptr where to store newly allocated pointer + * @param key key for the node + * @param data data to store + */ +lsm_error lsm_bt_node_init(lsm_bt_node **ptr, const char key, void *data); + +/** + * Deallocate a single binary tree node + */ +void lsm_bt_node_free(lsm_bt_node *node); + +/** + * A binary tree implementation using char values as keys + */ +typedef struct lsm_bt { + lsm_bt_node *root; + uint8_t size; +} lsm_bt; + +/** + * Initialize a new binary tree + * + * @param ptr where to store newly allocated pointer + */ +lsm_error lsm_bt_init(lsm_bt **ptr); + +/** + * Deallocate an entire binary tree, including all its nodes + */ +void lsm_bt_free(lsm_bt *bt); + +/** + * Search for the data stored behind the given key. + * + * @param out pointer to store data pointer in + * @param bt binary tree to search + * @param key key to search + */ +lsm_error lsm_bt_search(void **out, lsm_bt *bt, char key); + +/** + * Insert a new data value into the tree with the given key. + * + * @param bt binary tree to insert into + * @param key key to insert + * @param data data to store + */ +lsm_error lsm_bt_insert(lsm_bt *bt, char key, void *data); + +/** + * Remove the given key from the binary tree. + * + * @param bt binary tree to remove from + * @param key key to remove + */ +lsm_error lsm_bt_remove(lsm_bt *bt, char key); + +#endif diff --git a/lsm/test/bt/bt.c b/lsm/test/bt/bt.c new file mode 100644 index 0000000..152a6bc --- /dev/null +++ b/lsm/test/bt/bt.c @@ -0,0 +1,67 @@ +#include "test.h" +#include "lsm.h" +#include "lsm_bt.h" + +#define BT_INIT() \ + lsm_bt *bt; \ + TEST_CHECK(lsm_bt_init(&bt) == lsm_error_ok); \ + TEST_CHECK(bt != NULL) + +void test_init() { + BT_INIT(); +} + +void test_insert_first() { + BT_INIT(); + + TEST_CHECK(lsm_bt_insert(bt, 'a', (void *)1) == lsm_error_ok); + TEST_CHECK(lsm_bt_insert(bt, 'a', (void *)1) == lsm_error_already_present); + + void *data; + TEST_CHECK(lsm_bt_search(&data, bt, 'a') == lsm_error_ok); + TEST_CHECK(data == (void *)1); + + TEST_CHECK(lsm_bt_search(&data, bt, 'b') == lsm_error_not_found); +} + +void test_insert_two() { + BT_INIT(); + + TEST_CHECK(lsm_bt_insert(bt, 'a', (void *)1) == lsm_error_ok); + TEST_CHECK(lsm_bt_insert(bt, 'a', (void *)1) == lsm_error_already_present); + TEST_CHECK(lsm_bt_insert(bt, 'b', (void *)2) == lsm_error_ok); + TEST_CHECK(lsm_bt_insert(bt, 'b', (void *)2) == lsm_error_already_present); + + void *data; + TEST_CHECK(lsm_bt_search(&data, bt, 'a') == lsm_error_ok); + TEST_CHECK(data == (void *)1); + TEST_CHECK(lsm_bt_search(&data, bt, 'b') == lsm_error_ok); + TEST_CHECK(data == (void *)2); + TEST_CHECK(lsm_bt_search(&data, bt, 'c') == lsm_error_not_found); +} + +void test_insert_multiple() { + char chars[] = "falcoep"; + size_t char_count = sizeof(chars) / sizeof(char); + + BT_INIT(); + + for (size_t i = 0; i < char_count; i++) { + TEST_CHECK(lsm_bt_insert(bt, chars[i], (void *)(i + 1)) == lsm_error_ok); + } + + void *data; + for (size_t i = 0; i < char_count; i++) { + TEST_CHECK(lsm_bt_insert(bt, chars[i], (void *)(i + 1)) == lsm_error_already_present); + TEST_CHECK(lsm_bt_search(&data, bt, chars[i]) == lsm_error_ok); + TEST_CHECK(data == (void *)(i + 1)); + } +} + +TEST_LIST = { + { "test init", test_init }, + { "test insert first", test_insert_first }, + { "test insert two", test_insert_two }, + { "test insert multiple", test_insert_multiple }, + { NULL, NULL } +}; diff --git a/lsm/test/test.h b/lsm/test/test.h new file mode 100644 index 0000000..9ab8f88 --- /dev/null +++ b/lsm/test/test.h @@ -0,0 +1,1839 @@ +/* + * Acutest -- Another C/C++ Unit Test facility + * + * + * Copyright 2013-2020 Martin Mitas + * Copyright 2019 Garrett D'Amore + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef ACUTEST_H +#define ACUTEST_H + + +/************************ + *** Public interface *** + ************************/ + +/* By default, "acutest.h" provides the main program entry point (function + * main()). However, if the test suite is composed of multiple source files + * which include "acutest.h", then this causes a problem of multiple main() + * definitions. To avoid this problem, #define macro TEST_NO_MAIN in all + * compilation units but one. + */ + +/* Macro to specify list of unit tests in the suite. + * The unit test implementation MUST provide list of unit tests it implements + * with this macro: + * + * TEST_LIST = { + * { "test1_name", test1_func_ptr }, + * { "test2_name", test2_func_ptr }, + * ... + * { NULL, NULL } // zeroed record marking the end of the list + * }; + * + * The list specifies names of each test (must be unique) and pointer to + * a function implementing it. The function does not take any arguments + * and has no return values, i.e. every test function has to be compatible + * with this prototype: + * + * void test_func(void); + * + * Note the list has to be ended with a zeroed record. + */ +#define TEST_LIST const struct acutest_test_ acutest_list_[] + + +/* Macros for testing whether an unit test succeeds or fails. These macros + * can be used arbitrarily in functions implementing the unit tests. + * + * If any condition fails throughout execution of a test, the test fails. + * + * TEST_CHECK takes only one argument (the condition), TEST_CHECK_ allows + * also to specify an error message to print out if the condition fails. + * (It expects printf-like format string and its parameters). The macros + * return non-zero (condition passes) or 0 (condition fails). + * + * That can be useful when more conditions should be checked only if some + * preceding condition passes, as illustrated in this code snippet: + * + * SomeStruct* ptr = allocate_some_struct(); + * if(TEST_CHECK(ptr != NULL)) { + * TEST_CHECK(ptr->member1 < 100); + * TEST_CHECK(ptr->member2 > 200); + * } + */ +#define TEST_CHECK_(cond,...) acutest_check_((cond), __FILE__, __LINE__, __VA_ARGS__) +#define TEST_CHECK(cond) acutest_check_((cond), __FILE__, __LINE__, "%s", #cond) + + +/* These macros are the same as TEST_CHECK_ and TEST_CHECK except that if the + * condition fails, the currently executed unit test is immediately aborted. + * + * That is done either by calling abort() if the unit test is executed as a + * child process; or via longjmp() if the unit test is executed within the + * main Acutest process. + * + * As a side effect of such abortion, your unit tests may cause memory leaks, + * unflushed file descriptors, and other phenomena caused by the abortion. + * + * Therefore you should not use these as a general replacement for TEST_CHECK. + * Use it with some caution, especially if your test causes some other side + * effects to the outside world (e.g. communicating with some server, inserting + * into a database etc.). + */ +#define TEST_ASSERT_(cond,...) \ + do { \ + if(!acutest_check_((cond), __FILE__, __LINE__, __VA_ARGS__)) \ + acutest_abort_(); \ + } while(0) +#define TEST_ASSERT(cond) \ + do { \ + if(!acutest_check_((cond), __FILE__, __LINE__, "%s", #cond)) \ + acutest_abort_(); \ + } while(0) + + +#ifdef __cplusplus +/* Macros to verify that the code (the 1st argument) throws exception of given + * type (the 2nd argument). (Note these macros are only available in C++.) + * + * TEST_EXCEPTION_ is like TEST_EXCEPTION but accepts custom printf-like + * message. + * + * For example: + * + * TEST_EXCEPTION(function_that_throw(), ExpectedExceptionType); + * + * If the function_that_throw() throws ExpectedExceptionType, the check passes. + * If the function throws anything incompatible with ExpectedExceptionType + * (or if it does not thrown an exception at all), the check fails. + */ +#define TEST_EXCEPTION(code, exctype) \ + do { \ + bool exc_ok_ = false; \ + const char *msg_ = NULL; \ + try { \ + code; \ + msg_ = "No exception thrown."; \ + } catch(exctype const&) { \ + exc_ok_= true; \ + } catch(...) { \ + msg_ = "Unexpected exception thrown."; \ + } \ + acutest_check_(exc_ok_, __FILE__, __LINE__, #code " throws " #exctype);\ + if(msg_ != NULL) \ + acutest_message_("%s", msg_); \ + } while(0) +#define TEST_EXCEPTION_(code, exctype, ...) \ + do { \ + bool exc_ok_ = false; \ + const char *msg_ = NULL; \ + try { \ + code; \ + msg_ = "No exception thrown."; \ + } catch(exctype const&) { \ + exc_ok_= true; \ + } catch(...) { \ + msg_ = "Unexpected exception thrown."; \ + } \ + acutest_check_(exc_ok_, __FILE__, __LINE__, __VA_ARGS__); \ + if(msg_ != NULL) \ + acutest_message_("%s", msg_); \ + } while(0) +#endif /* #ifdef __cplusplus */ + + +/* Sometimes it is useful to split execution of more complex unit tests to some + * smaller parts and associate those parts with some names. + * + * This is especially handy if the given unit test is implemented as a loop + * over some vector of multiple testing inputs. Using these macros allow to use + * sort of subtitle for each iteration of the loop (e.g. outputting the input + * itself or a name associated to it), so that if any TEST_CHECK condition + * fails in the loop, it can be easily seen which iteration triggers the + * failure, without the need to manually output the iteration-specific data in + * every single TEST_CHECK inside the loop body. + * + * TEST_CASE allows to specify only single string as the name of the case, + * TEST_CASE_ provides all the power of printf-like string formatting. + * + * Note that the test cases cannot be nested. Starting a new test case ends + * implicitly the previous one. To end the test case explicitly (e.g. to end + * the last test case after exiting the loop), you may use TEST_CASE(NULL). + */ +#define TEST_CASE_(...) acutest_case_(__VA_ARGS__) +#define TEST_CASE(name) acutest_case_("%s", name) + + +/* Maximal output per TEST_CASE call. Longer messages are cut. + * You may define another limit prior including "acutest.h" + */ +#ifndef TEST_CASE_MAXSIZE +#define TEST_CASE_MAXSIZE 64 +#endif + + +/* printf-like macro for outputting an extra information about a failure. + * + * Intended use is to output some computed output versus the expected value, + * e.g. like this: + * + * if(!TEST_CHECK(produced == expected)) { + * TEST_MSG("Expected: %d", expected); + * TEST_MSG("Produced: %d", produced); + * } + * + * Note the message is only written down if the most recent use of any checking + * macro (like e.g. TEST_CHECK or TEST_EXCEPTION) in the current test failed. + * This means the above is equivalent to just this: + * + * TEST_CHECK(produced == expected); + * TEST_MSG("Expected: %d", expected); + * TEST_MSG("Produced: %d", produced); + * + * The macro can deal with multi-line output fairly well. It also automatically + * adds a final new-line if there is none present. + */ +#define TEST_MSG(...) acutest_message_(__VA_ARGS__) + + +/* Maximal output per TEST_MSG call. Longer messages are cut. + * You may define another limit prior including "acutest.h" + */ +#ifndef TEST_MSG_MAXSIZE +#define TEST_MSG_MAXSIZE 1024 +#endif + + +/* Macro for dumping a block of memory. + * + * Its intended use is very similar to what TEST_MSG is for, but instead of + * generating any printf-like message, this is for dumping raw block of a + * memory in a hexadecimal form: + * + * TEST_CHECK(size_produced == size_expected && + * memcmp(addr_produced, addr_expected, size_produced) == 0); + * TEST_DUMP("Expected:", addr_expected, size_expected); + * TEST_DUMP("Produced:", addr_produced, size_produced); + */ +#define TEST_DUMP(title, addr, size) acutest_dump_(title, addr, size) + +/* Maximal output per TEST_DUMP call (in bytes to dump). Longer blocks are cut. + * You may define another limit prior including "acutest.h" + */ +#ifndef TEST_DUMP_MAXSIZE +#define TEST_DUMP_MAXSIZE 1024 +#endif + + +/* Common test initialiation/clean-up + * + * In some test suites, it may be needed to perform some sort of the same + * initialization and/or clean-up in all the tests. + * + * Such test suites may use macros TEST_INIT and/or TEST_FINI prior including + * this header. The expansion of the macro is then used as a body of helper + * function called just before executing every single (TEST_INIT) or just after + * it ends (TEST_FINI). + * + * Examples of various ways how to use the macro TEST_INIT: + * + * #define TEST_INIT my_init_func(); + * #define TEST_INIT my_init_func() // Works even without the semicolon + * #define TEST_INIT setlocale(LC_ALL, NULL); + * #define TEST_INIT { setlocale(LC_ALL, NULL); my_init_func(); } + * + * TEST_FINI is to be used in the same way. + */ + + +/********************** + *** Implementation *** + **********************/ + +/* The unit test files should not rely on anything below. */ + +#include +#include +#include +#include +#include +#include + +#if defined(unix) || defined(__unix__) || defined(__unix) || defined(__APPLE__) +#define ACUTEST_UNIX_ 1 +#include +#include +#include +#include +#include +#include +#include + +#if defined CLOCK_PROCESS_CPUTIME_ID && defined CLOCK_MONOTONIC +#define ACUTEST_HAS_POSIX_TIMER_ 1 +#endif +#endif + +#if defined(_gnu_linux_) || defined(__linux__) +#define ACUTEST_LINUX_ 1 +#include +#include +#endif + +#if defined(_WIN32) || defined(__WIN32__) || defined(__WINDOWS__) +#define ACUTEST_WIN_ 1 + #include + #include +#endif + +#if defined(__APPLE__) +#define ACUTEST_MACOS_ + #include + #include + #include + #include + #include +#endif + +#ifdef __cplusplus +#include +#endif + +#ifdef __has_include +#if __has_include() +#include +#endif +#endif + +/* Enable the use of the non-standard keyword __attribute__ to silence warnings under some compilers */ +#if defined(__GNUC__) || defined(__clang__) +#define ACUTEST_ATTRIBUTE_(attr) __attribute__((attr)) +#else +#define ACUTEST_ATTRIBUTE_(attr) +#endif + +/* Note our global private identifiers end with '_' to mitigate risk of clash + * with the unit tests implementation. */ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _MSC_VER +/* In the multi-platform code like ours, we cannot use the non-standard + * "safe" functions from Microsoft C lib like e.g. sprintf_s() instead of + * standard sprintf(). Hence, lets disable the warning C4996. */ + #pragma warning(push) + #pragma warning(disable: 4996) +#endif + + +struct acutest_test_ { + const char* name; + void (*func)(void); +}; + +struct acutest_test_data_ { + unsigned char flags; + double duration; +}; + +enum { + ACUTEST_FLAG_RUN_ = 1 << 0, + ACUTEST_FLAG_SUCCESS_ = 1 << 1, + ACUTEST_FLAG_FAILURE_ = 1 << 2, +}; + +extern const struct acutest_test_ acutest_list_[]; + +int acutest_check_(int cond, const char* file, int line, const char* fmt, ...); +void acutest_case_(const char* fmt, ...); +void acutest_message_(const char* fmt, ...); +void acutest_dump_(const char* title, const void* addr, size_t size); +void acutest_abort_(void) ACUTEST_ATTRIBUTE_(noreturn); + + +#ifndef TEST_NO_MAIN + +static char* acutest_argv0_ = NULL; +static size_t acutest_list_size_ = 0; +static struct acutest_test_data_* acutest_test_data_ = NULL; +static size_t acutest_count_ = 0; +static int acutest_no_exec_ = -1; +static int acutest_no_summary_ = 0; +static int acutest_tap_ = 0; +static int acutest_skip_mode_ = 0; +static int acutest_worker_ = 0; +static int acutest_worker_index_ = 0; +static int acutest_cond_failed_ = 0; +static int acutest_was_aborted_ = 0; +static FILE *acutest_xml_output_ = NULL; + +static int acutest_stat_failed_units_ = 0; +static int acutest_stat_run_units_ = 0; + +static const struct acutest_test_* acutest_current_test_ = NULL; +static int acutest_current_index_ = 0; +static char acutest_case_name_[TEST_CASE_MAXSIZE] = ""; +static int acutest_test_already_logged_ = 0; +static int acutest_case_already_logged_ = 0; +static int acutest_verbose_level_ = 2; +static int acutest_test_failures_ = 0; +static int acutest_colorize_ = 0; +static int acutest_timer_ = 0; + +static int acutest_abort_has_jmp_buf_ = 0; +static jmp_buf acutest_abort_jmp_buf_; + + +static void +acutest_cleanup_(void) +{ + free((void*) acutest_test_data_); +} + +static void ACUTEST_ATTRIBUTE_(noreturn) +acutest_exit_(int exit_code) +{ + acutest_cleanup_(); + exit(exit_code); +} + +#if defined ACUTEST_WIN_ +typedef LARGE_INTEGER acutest_timer_type_; + static LARGE_INTEGER acutest_timer_freq_; + static acutest_timer_type_ acutest_timer_start_; + static acutest_timer_type_ acutest_timer_end_; + + static void + acutest_timer_init_(void) + { + QueryPerformanceFrequency(´st_timer_freq_); + } + + static void + acutest_timer_get_time_(LARGE_INTEGER* ts) + { + QueryPerformanceCounter(ts); + } + + static double + acutest_timer_diff_(LARGE_INTEGER start, LARGE_INTEGER end) + { + double duration = (double)(end.QuadPart - start.QuadPart); + duration /= (double)acutest_timer_freq_.QuadPart; + return duration; + } + + static void + acutest_timer_print_diff_(void) + { + printf("%.6lf secs", acutest_timer_diff_(acutest_timer_start_, acutest_timer_end_)); + } +#elif defined ACUTEST_HAS_POSIX_TIMER_ +static clockid_t acutest_timer_id_; +typedef struct timespec acutest_timer_type_; +static acutest_timer_type_ acutest_timer_start_; +static acutest_timer_type_ acutest_timer_end_; + +static void +acutest_timer_init_(void) +{ + if(acutest_timer_ == 1) + acutest_timer_id_ = CLOCK_MONOTONIC; + else if(acutest_timer_ == 2) + acutest_timer_id_ = CLOCK_PROCESS_CPUTIME_ID; +} + +static void +acutest_timer_get_time_(struct timespec* ts) +{ + clock_gettime(acutest_timer_id_, ts); +} + +static double +acutest_timer_diff_(struct timespec start, struct timespec end) +{ + double endns; + double startns; + + endns = end.tv_sec; + endns *= 1e9; + endns += end.tv_nsec; + + startns = start.tv_sec; + startns *= 1e9; + startns += start.tv_nsec; + + return ((endns - startns)/ 1e9); +} + +static void +acutest_timer_print_diff_(void) +{ + printf("%.6lf secs", + acutest_timer_diff_(acutest_timer_start_, acutest_timer_end_)); +} +#else +typedef int acutest_timer_type_; + static acutest_timer_type_ acutest_timer_start_; + static acutest_timer_type_ acutest_timer_end_; + + void + acutest_timer_init_(void) + {} + + static void + acutest_timer_get_time_(int* ts) + { + (void) ts; + } + + static double + acutest_timer_diff_(int start, int end) + { + (void) start; + (void) end; + return 0.0; + } + + static void + acutest_timer_print_diff_(void) + {} +#endif + +#define ACUTEST_COLOR_DEFAULT_ 0 +#define ACUTEST_COLOR_GREEN_ 1 +#define ACUTEST_COLOR_RED_ 2 +#define ACUTEST_COLOR_DEFAULT_INTENSIVE_ 3 +#define ACUTEST_COLOR_GREEN_INTENSIVE_ 4 +#define ACUTEST_COLOR_RED_INTENSIVE_ 5 + +static int ACUTEST_ATTRIBUTE_(format (printf, 2, 3)) +acutest_colored_printf_(int color, const char* fmt, ...) +{ + va_list args; + char buffer[256]; + int n; + + va_start(args, fmt); + vsnprintf(buffer, sizeof(buffer), fmt, args); + va_end(args); + buffer[sizeof(buffer)-1] = '\0'; + + if(!acutest_colorize_) { + return printf("%s", buffer); + } + +#if defined ACUTEST_UNIX_ + { + const char* col_str; + switch(color) { + case ACUTEST_COLOR_GREEN_: col_str = "\033[0;32m"; break; + case ACUTEST_COLOR_RED_: col_str = "\033[0;31m"; break; + case ACUTEST_COLOR_GREEN_INTENSIVE_: col_str = "\033[1;32m"; break; + case ACUTEST_COLOR_RED_INTENSIVE_: col_str = "\033[1;31m"; break; + case ACUTEST_COLOR_DEFAULT_INTENSIVE_: col_str = "\033[1m"; break; + default: col_str = "\033[0m"; break; + } + printf("%s", col_str); + n = printf("%s", buffer); + printf("\033[0m"); + return n; + } +#elif defined ACUTEST_WIN_ + { + HANDLE h; + CONSOLE_SCREEN_BUFFER_INFO info; + WORD attr; + + h = GetStdHandle(STD_OUTPUT_HANDLE); + GetConsoleScreenBufferInfo(h, &info); + + switch(color) { + case ACUTEST_COLOR_GREEN_: attr = FOREGROUND_GREEN; break; + case ACUTEST_COLOR_RED_: attr = FOREGROUND_RED; break; + case ACUTEST_COLOR_GREEN_INTENSIVE_: attr = FOREGROUND_GREEN | FOREGROUND_INTENSITY; break; + case ACUTEST_COLOR_RED_INTENSIVE_: attr = FOREGROUND_RED | FOREGROUND_INTENSITY; break; + case ACUTEST_COLOR_DEFAULT_INTENSIVE_: attr = FOREGROUND_BLUE | FOREGROUND_GREEN | FOREGROUND_RED | FOREGROUND_INTENSITY; break; + default: attr = 0; break; + } + if(attr != 0) + SetConsoleTextAttribute(h, attr); + n = printf("%s", buffer); + SetConsoleTextAttribute(h, info.wAttributes); + return n; + } +#else + n = printf("%s", buffer); + return n; +#endif +} + +static void +acutest_begin_test_line_(const struct acutest_test_* test) +{ + if(!acutest_tap_) { + if(acutest_verbose_level_ >= 3) { + acutest_colored_printf_(ACUTEST_COLOR_DEFAULT_INTENSIVE_, "Test %s:\n", test->name); + acutest_test_already_logged_++; + } else if(acutest_verbose_level_ >= 1) { + int n; + char spaces[48]; + + n = acutest_colored_printf_(ACUTEST_COLOR_DEFAULT_INTENSIVE_, "Test %s... ", test->name); + memset(spaces, ' ', sizeof(spaces)); + if(n < (int) sizeof(spaces)) + printf("%.*s", (int) sizeof(spaces) - n, spaces); + } else { + acutest_test_already_logged_ = 1; + } + } +} + +static void +acutest_finish_test_line_(int result) +{ + if(acutest_tap_) { + const char* str = (result == 0) ? "ok" : "not ok"; + + printf("%s %d - %s\n", str, acutest_current_index_ + 1, acutest_current_test_->name); + + if(result == 0 && acutest_timer_) { + printf("# Duration: "); + acutest_timer_print_diff_(); + printf("\n"); + } + } else { + int color = (result == 0) ? ACUTEST_COLOR_GREEN_INTENSIVE_ : ACUTEST_COLOR_RED_INTENSIVE_; + const char* str = (result == 0) ? "OK" : "FAILED"; + printf("[ "); + acutest_colored_printf_(color, "%s", str); + printf(" ]"); + + if(result == 0 && acutest_timer_) { + printf(" "); + acutest_timer_print_diff_(); + } + + printf("\n"); + } +} + +static void +acutest_line_indent_(int level) +{ + static const char spaces[] = " "; + int n = level * 2; + + if(acutest_tap_ && n > 0) { + n--; + printf("#"); + } + + while(n > 16) { + printf("%s", spaces); + n -= 16; + } + printf("%.*s", n, spaces); +} + +int ACUTEST_ATTRIBUTE_(format (printf, 4, 5)) +acutest_check_(int cond, const char* file, int line, const char* fmt, ...) +{ + const char *result_str; + int result_color; + int verbose_level; + + if(cond) { + result_str = "ok"; + result_color = ACUTEST_COLOR_GREEN_; + verbose_level = 3; + } else { + if(!acutest_test_already_logged_ && acutest_current_test_ != NULL) + acutest_finish_test_line_(-1); + + result_str = "failed"; + result_color = ACUTEST_COLOR_RED_; + verbose_level = 2; + acutest_test_failures_++; + acutest_test_already_logged_++; + } + + if(acutest_verbose_level_ >= verbose_level) { + va_list args; + + if(!acutest_case_already_logged_ && acutest_case_name_[0]) { + acutest_line_indent_(1); + acutest_colored_printf_(ACUTEST_COLOR_DEFAULT_INTENSIVE_, "Case %s:\n", acutest_case_name_); + acutest_test_already_logged_++; + acutest_case_already_logged_++; + } + + acutest_line_indent_(acutest_case_name_[0] ? 2 : 1); + if(file != NULL) { +#ifdef ACUTEST_WIN_ + const char* lastsep1 = strrchr(file, '\\'); + const char* lastsep2 = strrchr(file, '/'); + if(lastsep1 == NULL) + lastsep1 = file-1; + if(lastsep2 == NULL) + lastsep2 = file-1; + file = (lastsep1 > lastsep2 ? lastsep1 : lastsep2) + 1; +#else + const char* lastsep = strrchr(file, '/'); + if(lastsep != NULL) + file = lastsep+1; +#endif + printf("%s:%d: Check ", file, line); + } + + va_start(args, fmt); + vprintf(fmt, args); + va_end(args); + + printf("... "); + acutest_colored_printf_(result_color, "%s", result_str); + printf("\n"); + acutest_test_already_logged_++; + } + + acutest_cond_failed_ = (cond == 0); + return !acutest_cond_failed_; +} + +void ACUTEST_ATTRIBUTE_(format (printf, 1, 2)) +acutest_case_(const char* fmt, ...) +{ + va_list args; + + if(acutest_verbose_level_ < 2) + return; + + if(acutest_case_name_[0]) { + acutest_case_already_logged_ = 0; + acutest_case_name_[0] = '\0'; + } + + if(fmt == NULL) + return; + + va_start(args, fmt); + vsnprintf(acutest_case_name_, sizeof(acutest_case_name_) - 1, fmt, args); + va_end(args); + acutest_case_name_[sizeof(acutest_case_name_) - 1] = '\0'; + + if(acutest_verbose_level_ >= 3) { + acutest_line_indent_(1); + acutest_colored_printf_(ACUTEST_COLOR_DEFAULT_INTENSIVE_, "Case %s:\n", acutest_case_name_); + acutest_test_already_logged_++; + acutest_case_already_logged_++; + } +} + +void ACUTEST_ATTRIBUTE_(format (printf, 1, 2)) +acutest_message_(const char* fmt, ...) +{ + char buffer[TEST_MSG_MAXSIZE]; + char* line_beg; + char* line_end; + va_list args; + + if(acutest_verbose_level_ < 2) + return; + + /* We allow extra message only when something is already wrong in the + * current test. */ + if(acutest_current_test_ == NULL || !acutest_cond_failed_) + return; + + va_start(args, fmt); + vsnprintf(buffer, TEST_MSG_MAXSIZE, fmt, args); + va_end(args); + buffer[TEST_MSG_MAXSIZE-1] = '\0'; + + line_beg = buffer; + while(1) { + line_end = strchr(line_beg, '\n'); + if(line_end == NULL) + break; + acutest_line_indent_(acutest_case_name_[0] ? 3 : 2); + printf("%.*s\n", (int)(line_end - line_beg), line_beg); + line_beg = line_end + 1; + } + if(line_beg[0] != '\0') { + acutest_line_indent_(acutest_case_name_[0] ? 3 : 2); + printf("%s\n", line_beg); + } +} + +void +acutest_dump_(const char* title, const void* addr, size_t size) +{ + static const size_t BYTES_PER_LINE = 16; + size_t line_beg; + size_t truncate = 0; + + if(acutest_verbose_level_ < 2) + return; + + /* We allow extra message only when something is already wrong in the + * current test. */ + if(acutest_current_test_ == NULL || !acutest_cond_failed_) + return; + + if(size > TEST_DUMP_MAXSIZE) { + truncate = size - TEST_DUMP_MAXSIZE; + size = TEST_DUMP_MAXSIZE; + } + + acutest_line_indent_(acutest_case_name_[0] ? 3 : 2); + printf((title[strlen(title)-1] == ':') ? "%s\n" : "%s:\n", title); + + for(line_beg = 0; line_beg < size; line_beg += BYTES_PER_LINE) { + size_t line_end = line_beg + BYTES_PER_LINE; + size_t off; + + acutest_line_indent_(acutest_case_name_[0] ? 4 : 3); + printf("%08lx: ", (unsigned long)line_beg); + for(off = line_beg; off < line_end; off++) { + if(off < size) + printf(" %02x", ((const unsigned char*)addr)[off]); + else + printf(" "); + } + + printf(" "); + for(off = line_beg; off < line_end; off++) { + unsigned char byte = ((const unsigned char*)addr)[off]; + if(off < size) + printf("%c", (iscntrl(byte) ? '.' : byte)); + else + break; + } + + printf("\n"); + } + + if(truncate > 0) { + acutest_line_indent_(acutest_case_name_[0] ? 4 : 3); + printf(" ... (and more %u bytes)\n", (unsigned) truncate); + } +} + +/* This is called just before each test */ +static void +acutest_init_(const char *test_name) +{ +#ifdef TEST_INIT + TEST_INIT + ; /* Allow for a single unterminated function call */ +#endif + + /* Suppress any warnings about unused variable. */ + (void) test_name; +} + +/* This is called after each test */ +static void +acutest_fini_(const char *test_name) +{ +#ifdef TEST_FINI + TEST_FINI + ; /* Allow for a single unterminated function call */ +#endif + + /* Suppress any warnings about unused variable. */ + (void) test_name; +} + +void +acutest_abort_(void) +{ + if(acutest_abort_has_jmp_buf_) { + longjmp(acutest_abort_jmp_buf_, 1); + } else { + if(acutest_current_test_ != NULL) + acutest_fini_(acutest_current_test_->name); + abort(); + } +} + +static void +acutest_list_names_(void) +{ + const struct acutest_test_* test; + + printf("Unit tests:\n"); + for(test = ´st_list_[0]; test->func != NULL; test++) + printf(" %s\n", test->name); +} + +static void +acutest_remember_(int i) +{ + if(acutest_test_data_[i].flags & ACUTEST_FLAG_RUN_) + return; + + acutest_test_data_[i].flags |= ACUTEST_FLAG_RUN_; + acutest_count_++; +} + +static void +acutest_set_success_(int i, int success) +{ + acutest_test_data_[i].flags |= success ? ACUTEST_FLAG_SUCCESS_ : ACUTEST_FLAG_FAILURE_; +} + +static void +acutest_set_duration_(int i, double duration) +{ + acutest_test_data_[i].duration = duration; +} + +static int +acutest_name_contains_word_(const char* name, const char* pattern) +{ + static const char word_delim[] = " \t-_/.,:;"; + const char* substr; + size_t pattern_len; + + pattern_len = strlen(pattern); + + substr = strstr(name, pattern); + while(substr != NULL) { + int starts_on_word_boundary = (substr == name || strchr(word_delim, substr[-1]) != NULL); + int ends_on_word_boundary = (substr[pattern_len] == '\0' || strchr(word_delim, substr[pattern_len]) != NULL); + + if(starts_on_word_boundary && ends_on_word_boundary) + return 1; + + substr = strstr(substr+1, pattern); + } + + return 0; +} + +static int +acutest_lookup_(const char* pattern) +{ + int i; + int n = 0; + + /* Try exact match. */ + for(i = 0; i < (int) acutest_list_size_; i++) { + if(strcmp(acutest_list_[i].name, pattern) == 0) { + acutest_remember_(i); + n++; + break; + } + } + if(n > 0) + return n; + + /* Try word match. */ + for(i = 0; i < (int) acutest_list_size_; i++) { + if(acutest_name_contains_word_(acutest_list_[i].name, pattern)) { + acutest_remember_(i); + n++; + } + } + if(n > 0) + return n; + + /* Try relaxed match. */ + for(i = 0; i < (int) acutest_list_size_; i++) { + if(strstr(acutest_list_[i].name, pattern) != NULL) { + acutest_remember_(i); + n++; + } + } + + return n; +} + + +/* Called if anything goes bad in Acutest, or if the unit test ends in other + * way then by normal returning from its function (e.g. exception or some + * abnormal child process termination). */ +static void ACUTEST_ATTRIBUTE_(format (printf, 1, 2)) +acutest_error_(const char* fmt, ...) +{ + if(acutest_verbose_level_ == 0) + return; + + if(acutest_verbose_level_ >= 2) { + va_list args; + + acutest_line_indent_(1); + if(acutest_verbose_level_ >= 3) + acutest_colored_printf_(ACUTEST_COLOR_RED_INTENSIVE_, "ERROR: "); + va_start(args, fmt); + vprintf(fmt, args); + va_end(args); + printf("\n"); + } + + if(acutest_verbose_level_ >= 3) { + printf("\n"); + } +} + +/* Call directly the given test unit function. */ +static int +acutest_do_run_(const struct acutest_test_* test, int index) +{ + int status = -1; + + acutest_was_aborted_ = 0; + acutest_current_test_ = test; + acutest_current_index_ = index; + acutest_test_failures_ = 0; + acutest_test_already_logged_ = 0; + acutest_cond_failed_ = 0; + +#ifdef __cplusplus + try { +#endif + acutest_init_(test->name); + acutest_begin_test_line_(test); + + /* This is good to do in case the test unit crashes. */ + fflush(stdout); + fflush(stderr); + + if(!acutest_worker_) { + acutest_abort_has_jmp_buf_ = 1; + if(setjmp(acutest_abort_jmp_buf_) != 0) { + acutest_was_aborted_ = 1; + goto aborted; + } + } + + acutest_timer_get_time_(´st_timer_start_); + test->func(); + aborted: + acutest_abort_has_jmp_buf_ = 0; + acutest_timer_get_time_(´st_timer_end_); + + if(acutest_verbose_level_ >= 3) { + acutest_line_indent_(1); + if(acutest_test_failures_ == 0) { + acutest_colored_printf_(ACUTEST_COLOR_GREEN_INTENSIVE_, "SUCCESS: "); + printf("All conditions have passed.\n"); + + if(acutest_timer_) { + acutest_line_indent_(1); + printf("Duration: "); + acutest_timer_print_diff_(); + printf("\n"); + } + } else { + acutest_colored_printf_(ACUTEST_COLOR_RED_INTENSIVE_, "FAILED: "); + if(!acutest_was_aborted_) { + printf("%d condition%s %s failed.\n", + acutest_test_failures_, + (acutest_test_failures_ == 1) ? "" : "s", + (acutest_test_failures_ == 1) ? "has" : "have"); + } else { + printf("Aborted.\n"); + } + } + printf("\n"); + } else if(acutest_verbose_level_ >= 1 && acutest_test_failures_ == 0) { + acutest_finish_test_line_(0); + } + + status = (acutest_test_failures_ == 0) ? 0 : -1; + +#ifdef __cplusplus + } catch(std::exception& e) { + const char* what = e.what(); + acutest_check_(0, NULL, 0, "Threw std::exception"); + if(what != NULL) + acutest_message_("std::exception::what(): %s", what); + + if(acutest_verbose_level_ >= 3) { + acutest_line_indent_(1); + acutest_colored_printf_(ACUTEST_COLOR_RED_INTENSIVE_, "FAILED: "); + printf("C++ exception.\n\n"); + } + } catch(...) { + acutest_check_(0, NULL, 0, "Threw an exception"); + + if(acutest_verbose_level_ >= 3) { + acutest_line_indent_(1); + acutest_colored_printf_(ACUTEST_COLOR_RED_INTENSIVE_, "FAILED: "); + printf("C++ exception.\n\n"); + } + } +#endif + + acutest_fini_(test->name); + acutest_case_(NULL); + acutest_current_test_ = NULL; + + return status; +} + +/* Trigger the unit test. If possible (and not suppressed) it starts a child + * process who calls acutest_do_run_(), otherwise it calls acutest_do_run_() + * directly. */ +static void +acutest_run_(const struct acutest_test_* test, int index, int master_index) +{ + int failed = 1; + acutest_timer_type_ start, end; + + acutest_current_test_ = test; + acutest_test_already_logged_ = 0; + acutest_timer_get_time_(&start); + + if(!acutest_no_exec_) { + +#if defined(ACUTEST_UNIX_) + + pid_t pid; + int exit_code; + + /* Make sure the child starts with empty I/O buffers. */ + fflush(stdout); + fflush(stderr); + + pid = fork(); + if(pid == (pid_t)-1) { + acutest_error_("Cannot fork. %s [%d]", strerror(errno), errno); + failed = 1; + } else if(pid == 0) { + /* Child: Do the test. */ + acutest_worker_ = 1; + failed = (acutest_do_run_(test, index) != 0); + acutest_exit_(failed ? 1 : 0); + } else { + /* Parent: Wait until child terminates and analyze its exit code. */ + waitpid(pid, &exit_code, 0); + if(WIFEXITED(exit_code)) { + switch(WEXITSTATUS(exit_code)) { + case 0: failed = 0; break; /* test has passed. */ + case 1: /* noop */ break; /* "normal" failure. */ + default: acutest_error_("Unexpected exit code [%d]", WEXITSTATUS(exit_code)); + } + } else if(WIFSIGNALED(exit_code)) { + char tmp[32]; + const char* signame; + switch(WTERMSIG(exit_code)) { + case SIGINT: signame = "SIGINT"; break; + case SIGHUP: signame = "SIGHUP"; break; + case SIGQUIT: signame = "SIGQUIT"; break; + case SIGABRT: signame = "SIGABRT"; break; + case SIGKILL: signame = "SIGKILL"; break; + case SIGSEGV: signame = "SIGSEGV"; break; + case SIGILL: signame = "SIGILL"; break; + case SIGTERM: signame = "SIGTERM"; break; + default: sprintf(tmp, "signal %d", WTERMSIG(exit_code)); signame = tmp; break; + } + acutest_error_("Test interrupted by %s.", signame); + } else { + acutest_error_("Test ended in an unexpected way [%d].", exit_code); + } + } + +#elif defined(ACUTEST_WIN_) + + char buffer[512] = {0}; + STARTUPINFOA startupInfo; + PROCESS_INFORMATION processInfo; + DWORD exitCode; + + /* Windows has no fork(). So we propagate all info into the child + * through a command line arguments. */ + _snprintf(buffer, sizeof(buffer)-1, + "%s --worker=%d %s --no-exec --no-summary %s --verbose=%d --color=%s -- \"%s\"", + acutest_argv0_, index, acutest_timer_ ? "--time" : "", + acutest_tap_ ? "--tap" : "", acutest_verbose_level_, + acutest_colorize_ ? "always" : "never", + test->name); + memset(&startupInfo, 0, sizeof(startupInfo)); + startupInfo.cb = sizeof(STARTUPINFO); + if(CreateProcessA(NULL, buffer, NULL, NULL, FALSE, 0, NULL, NULL, &startupInfo, &processInfo)) { + WaitForSingleObject(processInfo.hProcess, INFINITE); + GetExitCodeProcess(processInfo.hProcess, &exitCode); + CloseHandle(processInfo.hThread); + CloseHandle(processInfo.hProcess); + failed = (exitCode != 0); + if(exitCode > 1) { + switch(exitCode) { + case 3: acutest_error_("Aborted."); break; + case 0xC0000005: acutest_error_("Access violation."); break; + default: acutest_error_("Test ended in an unexpected way [%lu].", exitCode); break; + } + } + } else { + acutest_error_("Cannot create unit test subprocess [%ld].", GetLastError()); + failed = 1; + } + +#else + + /* A platform where we don't know how to run child process. */ + failed = (acutest_do_run_(test, index) != 0); + +#endif + + } else { + /* Child processes suppressed through --no-exec. */ + failed = (acutest_do_run_(test, index) != 0); + } + acutest_timer_get_time_(&end); + + acutest_current_test_ = NULL; + + acutest_stat_run_units_++; + if(failed) + acutest_stat_failed_units_++; + + acutest_set_success_(master_index, !failed); + acutest_set_duration_(master_index, acutest_timer_diff_(start, end)); +} + +#if defined(ACUTEST_WIN_) +/* Callback for SEH events. */ +static LONG CALLBACK +acutest_seh_exception_filter_(EXCEPTION_POINTERS *ptrs) +{ + acutest_check_(0, NULL, 0, "Unhandled SEH exception"); + acutest_message_("Exception code: 0x%08lx", ptrs->ExceptionRecord->ExceptionCode); + acutest_message_("Exception address: 0x%p", ptrs->ExceptionRecord->ExceptionAddress); + + fflush(stdout); + fflush(stderr); + + return EXCEPTION_EXECUTE_HANDLER; +} +#endif + + +#define ACUTEST_CMDLINE_OPTFLAG_OPTIONALARG_ 0x0001 +#define ACUTEST_CMDLINE_OPTFLAG_REQUIREDARG_ 0x0002 + +#define ACUTEST_CMDLINE_OPTID_NONE_ 0 +#define ACUTEST_CMDLINE_OPTID_UNKNOWN_ (-0x7fffffff + 0) +#define ACUTEST_CMDLINE_OPTID_MISSINGARG_ (-0x7fffffff + 1) +#define ACUTEST_CMDLINE_OPTID_BOGUSARG_ (-0x7fffffff + 2) + +typedef struct acutest_test_CMDLINE_OPTION_ { + char shortname; + const char* longname; + int id; + unsigned flags; +} ACUTEST_CMDLINE_OPTION_; + +static int +acutest_cmdline_handle_short_opt_group_(const ACUTEST_CMDLINE_OPTION_* options, + const char* arggroup, + int (*callback)(int /*optval*/, const char* /*arg*/)) +{ + const ACUTEST_CMDLINE_OPTION_* opt; + int i; + int ret = 0; + + for(i = 0; arggroup[i] != '\0'; i++) { + for(opt = options; opt->id != 0; opt++) { + if(arggroup[i] == opt->shortname) + break; + } + + if(opt->id != 0 && !(opt->flags & ACUTEST_CMDLINE_OPTFLAG_REQUIREDARG_)) { + ret = callback(opt->id, NULL); + } else { + /* Unknown option. */ + char badoptname[3]; + badoptname[0] = '-'; + badoptname[1] = arggroup[i]; + badoptname[2] = '\0'; + ret = callback((opt->id != 0 ? ACUTEST_CMDLINE_OPTID_MISSINGARG_ : ACUTEST_CMDLINE_OPTID_UNKNOWN_), + badoptname); + } + + if(ret != 0) + break; + } + + return ret; +} + +#define ACUTEST_CMDLINE_AUXBUF_SIZE_ 32 + +static int +acutest_cmdline_read_(const ACUTEST_CMDLINE_OPTION_* options, int argc, char** argv, + int (*callback)(int /*optval*/, const char* /*arg*/)) +{ + + const ACUTEST_CMDLINE_OPTION_* opt; + char auxbuf[ACUTEST_CMDLINE_AUXBUF_SIZE_+1]; + int after_doubledash = 0; + int i = 1; + int ret = 0; + + auxbuf[ACUTEST_CMDLINE_AUXBUF_SIZE_] = '\0'; + + while(i < argc) { + if(after_doubledash || strcmp(argv[i], "-") == 0) { + /* Non-option argument. */ + ret = callback(ACUTEST_CMDLINE_OPTID_NONE_, argv[i]); + } else if(strcmp(argv[i], "--") == 0) { + /* End of options. All the remaining members are non-option arguments. */ + after_doubledash = 1; + } else if(argv[i][0] != '-') { + /* Non-option argument. */ + ret = callback(ACUTEST_CMDLINE_OPTID_NONE_, argv[i]); + } else { + for(opt = options; opt->id != 0; opt++) { + if(opt->longname != NULL && strncmp(argv[i], "--", 2) == 0) { + size_t len = strlen(opt->longname); + if(strncmp(argv[i]+2, opt->longname, len) == 0) { + /* Regular long option. */ + if(argv[i][2+len] == '\0') { + /* with no argument provided. */ + if(!(opt->flags & ACUTEST_CMDLINE_OPTFLAG_REQUIREDARG_)) + ret = callback(opt->id, NULL); + else + ret = callback(ACUTEST_CMDLINE_OPTID_MISSINGARG_, argv[i]); + break; + } else if(argv[i][2+len] == '=') { + /* with an argument provided. */ + if(opt->flags & (ACUTEST_CMDLINE_OPTFLAG_OPTIONALARG_ | ACUTEST_CMDLINE_OPTFLAG_REQUIREDARG_)) { + ret = callback(opt->id, argv[i]+2+len+1); + } else { + sprintf(auxbuf, "--%s", opt->longname); + ret = callback(ACUTEST_CMDLINE_OPTID_BOGUSARG_, auxbuf); + } + break; + } else { + continue; + } + } + } else if(opt->shortname != '\0' && argv[i][0] == '-') { + if(argv[i][1] == opt->shortname) { + /* Regular short option. */ + if(opt->flags & ACUTEST_CMDLINE_OPTFLAG_REQUIREDARG_) { + if(argv[i][2] != '\0') + ret = callback(opt->id, argv[i]+2); + else if(i+1 < argc) + ret = callback(opt->id, argv[++i]); + else + ret = callback(ACUTEST_CMDLINE_OPTID_MISSINGARG_, argv[i]); + break; + } else { + ret = callback(opt->id, NULL); + + /* There might be more (argument-less) short options + * grouped together. */ + if(ret == 0 && argv[i][2] != '\0') + ret = acutest_cmdline_handle_short_opt_group_(options, argv[i]+2, callback); + break; + } + } + } + } + + if(opt->id == 0) { /* still not handled? */ + if(argv[i][0] != '-') { + /* Non-option argument. */ + ret = callback(ACUTEST_CMDLINE_OPTID_NONE_, argv[i]); + } else { + /* Unknown option. */ + char* badoptname = argv[i]; + + if(strncmp(badoptname, "--", 2) == 0) { + /* Strip any argument from the long option. */ + char* assignment = strchr(badoptname, '='); + if(assignment != NULL) { + size_t len = assignment - badoptname; + if(len > ACUTEST_CMDLINE_AUXBUF_SIZE_) + len = ACUTEST_CMDLINE_AUXBUF_SIZE_; + strncpy(auxbuf, badoptname, len); + auxbuf[len] = '\0'; + badoptname = auxbuf; + } + } + + ret = callback(ACUTEST_CMDLINE_OPTID_UNKNOWN_, badoptname); + } + } + } + + if(ret != 0) + return ret; + i++; + } + + return ret; +} + +static void +acutest_help_(void) +{ + printf("Usage: %s [options] [test...]\n", acutest_argv0_); + printf("\n"); + printf("Run the specified unit tests; or if the option '--skip' is used, run all\n"); + printf("tests in the suite but those listed. By default, if no tests are specified\n"); + printf("on the command line, all unit tests in the suite are run.\n"); + printf("\n"); + printf("Options:\n"); + printf(" -s, --skip Execute all unit tests but the listed ones\n"); + printf(" --exec[=WHEN] If supported, execute unit tests as child processes\n"); + printf(" (WHEN is one of 'auto', 'always', 'never')\n"); + printf(" -E, --no-exec Same as --exec=never\n"); +#if defined ACUTEST_WIN_ + printf(" -t, --time Measure test duration\n"); +#elif defined ACUTEST_HAS_POSIX_TIMER_ + printf(" -t, --time Measure test duration (real time)\n"); + printf(" --time=TIMER Measure test duration, using given timer\n"); + printf(" (TIMER is one of 'real', 'cpu')\n"); +#endif + printf(" --no-summary Suppress printing of test results summary\n"); + printf(" --tap Produce TAP-compliant output\n"); + printf(" (See https://testanything.org/)\n"); + printf(" -x, --xml-output=FILE Enable XUnit output to the given file\n"); + printf(" -l, --list List unit tests in the suite and exit\n"); + printf(" -v, --verbose Make output more verbose\n"); + printf(" --verbose=LEVEL Set verbose level to LEVEL:\n"); + printf(" 0 ... Be silent\n"); + printf(" 1 ... Output one line per test (and summary)\n"); + printf(" 2 ... As 1 and failed conditions (this is default)\n"); + printf(" 3 ... As 1 and all conditions (and extended summary)\n"); + printf(" -q, --quiet Same as --verbose=0\n"); + printf(" --color[=WHEN] Enable colorized output\n"); + printf(" (WHEN is one of 'auto', 'always', 'never')\n"); + printf(" --no-color Same as --color=never\n"); + printf(" -h, --help Display this help and exit\n"); + + if(acutest_list_size_ < 16) { + printf("\n"); + acutest_list_names_(); + } +} + +static const ACUTEST_CMDLINE_OPTION_ acutest_cmdline_options_[] = { + { 's', "skip", 's', 0 }, + { 0, "exec", 'e', ACUTEST_CMDLINE_OPTFLAG_OPTIONALARG_ }, + { 'E', "no-exec", 'E', 0 }, +#if defined ACUTEST_WIN_ + { 't', "time", 't', 0 }, + { 0, "timer", 't', 0 }, /* kept for compatibility */ +#elif defined ACUTEST_HAS_POSIX_TIMER_ + { 't', "time", 't', ACUTEST_CMDLINE_OPTFLAG_OPTIONALARG_ }, + { 0, "timer", 't', ACUTEST_CMDLINE_OPTFLAG_OPTIONALARG_ }, /* kept for compatibility */ +#endif + { 0, "no-summary", 'S', 0 }, + { 0, "tap", 'T', 0 }, + { 'l', "list", 'l', 0 }, + { 'v', "verbose", 'v', ACUTEST_CMDLINE_OPTFLAG_OPTIONALARG_ }, + { 'q', "quiet", 'q', 0 }, + { 0, "color", 'c', ACUTEST_CMDLINE_OPTFLAG_OPTIONALARG_ }, + { 0, "no-color", 'C', 0 }, + { 'h', "help", 'h', 0 }, + { 0, "worker", 'w', ACUTEST_CMDLINE_OPTFLAG_REQUIREDARG_ }, /* internal */ + { 'x', "xml-output", 'x', ACUTEST_CMDLINE_OPTFLAG_REQUIREDARG_ }, + { 0, NULL, 0, 0 } +}; + +static int +acutest_cmdline_callback_(int id, const char* arg) +{ + switch(id) { + case 's': + acutest_skip_mode_ = 1; + break; + + case 'e': + if(arg == NULL || strcmp(arg, "always") == 0) { + acutest_no_exec_ = 0; + } else if(strcmp(arg, "never") == 0) { + acutest_no_exec_ = 1; + } else if(strcmp(arg, "auto") == 0) { + /*noop*/ + } else { + fprintf(stderr, "%s: Unrecognized argument '%s' for option --exec.\n", acutest_argv0_, arg); + fprintf(stderr, "Try '%s --help' for more information.\n", acutest_argv0_); + acutest_exit_(2); + } + break; + + case 'E': + acutest_no_exec_ = 1; + break; + + case 't': +#if defined ACUTEST_WIN_ || defined ACUTEST_HAS_POSIX_TIMER_ + if(arg == NULL || strcmp(arg, "real") == 0) { + acutest_timer_ = 1; +#ifndef ACUTEST_WIN_ + } else if(strcmp(arg, "cpu") == 0) { + acutest_timer_ = 2; +#endif + } else { + fprintf(stderr, "%s: Unrecognized argument '%s' for option --time.\n", acutest_argv0_, arg); + fprintf(stderr, "Try '%s --help' for more information.\n", acutest_argv0_); + acutest_exit_(2); + } +#endif + break; + + case 'S': + acutest_no_summary_ = 1; + break; + + case 'T': + acutest_tap_ = 1; + break; + + case 'l': + acutest_list_names_(); + acutest_exit_(0); + break; + + case 'v': + acutest_verbose_level_ = (arg != NULL ? atoi(arg) : acutest_verbose_level_+1); + break; + + case 'q': + acutest_verbose_level_ = 0; + break; + + case 'c': + if(arg == NULL || strcmp(arg, "always") == 0) { + acutest_colorize_ = 1; + } else if(strcmp(arg, "never") == 0) { + acutest_colorize_ = 0; + } else if(strcmp(arg, "auto") == 0) { + /*noop*/ + } else { + fprintf(stderr, "%s: Unrecognized argument '%s' for option --color.\n", acutest_argv0_, arg); + fprintf(stderr, "Try '%s --help' for more information.\n", acutest_argv0_); + acutest_exit_(2); + } + break; + + case 'C': + acutest_colorize_ = 0; + break; + + case 'h': + acutest_help_(); + acutest_exit_(0); + break; + + case 'w': + acutest_worker_ = 1; + acutest_worker_index_ = atoi(arg); + break; + case 'x': + acutest_xml_output_ = fopen(arg, "w"); + if (!acutest_xml_output_) { + fprintf(stderr, "Unable to open '%s': %s\n", arg, strerror(errno)); + acutest_exit_(2); + } + break; + + case 0: + if(acutest_lookup_(arg) == 0) { + fprintf(stderr, "%s: Unrecognized unit test '%s'\n", acutest_argv0_, arg); + fprintf(stderr, "Try '%s --list' for list of unit tests.\n", acutest_argv0_); + acutest_exit_(2); + } + break; + + case ACUTEST_CMDLINE_OPTID_UNKNOWN_: + fprintf(stderr, "Unrecognized command line option '%s'.\n", arg); + fprintf(stderr, "Try '%s --help' for more information.\n", acutest_argv0_); + acutest_exit_(2); + break; + + case ACUTEST_CMDLINE_OPTID_MISSINGARG_: + fprintf(stderr, "The command line option '%s' requires an argument.\n", arg); + fprintf(stderr, "Try '%s --help' for more information.\n", acutest_argv0_); + acutest_exit_(2); + break; + + case ACUTEST_CMDLINE_OPTID_BOGUSARG_: + fprintf(stderr, "The command line option '%s' does not expect an argument.\n", arg); + fprintf(stderr, "Try '%s --help' for more information.\n", acutest_argv0_); + acutest_exit_(2); + break; + } + + return 0; +} + + +#ifdef ACUTEST_LINUX_ +static int +acutest_is_tracer_present_(void) +{ + /* Must be large enough so the line 'TracerPid: ${PID}' can fit in. */ + static const int OVERLAP = 32; + + char buf[512]; + int tracer_present = 0; + int fd; + size_t n_read = 0; + + fd = open("/proc/self/status", O_RDONLY); + if(fd == -1) + return 0; + + while(1) { + static const char pattern[] = "TracerPid:"; + const char* field; + + while(n_read < sizeof(buf) - 1) { + ssize_t n; + + n = read(fd, buf + n_read, sizeof(buf) - 1 - n_read); + if(n <= 0) + break; + n_read += n; + } + buf[n_read] = '\0'; + + field = strstr(buf, pattern); + if(field != NULL && field < buf + sizeof(buf) - OVERLAP) { + pid_t tracer_pid = (pid_t) atoi(field + sizeof(pattern) - 1); + tracer_present = (tracer_pid != 0); + break; + } + + if(n_read == sizeof(buf) - 1) { + /* Move the tail with the potentially incomplete line we're looking + * for to the beginning of the buffer. */ + memmove(buf, buf + sizeof(buf) - 1 - OVERLAP, OVERLAP); + n_read = OVERLAP; + } else { + break; + } + } + + close(fd); + return tracer_present; +} +#endif + +#ifdef ACUTEST_MACOS_ +static bool +acutest_AmIBeingDebugged(void) +{ + int junk; + int mib[4]; + struct kinfo_proc info; + size_t size; + + // Initialize the flags so that, if sysctl fails for some bizarre + // reason, we get a predictable result. + info.kp_proc.p_flag = 0; + + // Initialize mib, which tells sysctl the info we want, in this case + // we're looking for information about a specific process ID. + mib[0] = CTL_KERN; + mib[1] = KERN_PROC; + mib[2] = KERN_PROC_PID; + mib[3] = getpid(); + + // Call sysctl. + size = sizeof(info); + junk = sysctl(mib, sizeof(mib) / sizeof(*mib), &info, &size, NULL, 0); + assert(junk == 0); + + // We're being debugged if the P_TRACED flag is set. + return ( (info.kp_proc.p_flag & P_TRACED) != 0 ); +} +#endif + +int +main(int argc, char** argv) +{ + int i; + + acutest_argv0_ = argv[0]; + +#if defined ACUTEST_UNIX_ + acutest_colorize_ = isatty(STDOUT_FILENO); +#elif defined ACUTEST_WIN_ + #if defined _BORLANDC_ + acutest_colorize_ = isatty(_fileno(stdout)); + #else + acutest_colorize_ = _isatty(_fileno(stdout)); + #endif +#else + acutest_colorize_ = 0; +#endif + + /* Count all test units */ + acutest_list_size_ = 0; + for(i = 0; acutest_list_[i].func != NULL; i++) + acutest_list_size_++; + + acutest_test_data_ = (struct acutest_test_data_*)calloc(acutest_list_size_, sizeof(struct acutest_test_data_)); + if(acutest_test_data_ == NULL) { + fprintf(stderr, "Out of memory.\n"); + acutest_exit_(2); + } + + /* Parse options */ + acutest_cmdline_read_(acutest_cmdline_options_, argc, argv, acutest_cmdline_callback_); + + /* Initialize the proper timer. */ + acutest_timer_init_(); + +#if defined(ACUTEST_WIN_) + SetUnhandledExceptionFilter(acutest_seh_exception_filter_); +#ifdef _MSC_VER + _set_abort_behavior(0, _WRITE_ABORT_MSG); +#endif +#endif + + /* By default, we want to run all tests. */ + if(acutest_count_ == 0) { + for(i = 0; acutest_list_[i].func != NULL; i++) + acutest_remember_(i); + } + + /* Guess whether we want to run unit tests as child processes. */ + if(acutest_no_exec_ < 0) { + acutest_no_exec_ = 0; + + if(acutest_count_ <= 1) { + acutest_no_exec_ = 1; + } else { +#ifdef ACUTEST_WIN_ + if(IsDebuggerPresent()) + acutest_no_exec_ = 1; +#endif +#ifdef ACUTEST_LINUX_ + if(acutest_is_tracer_present_()) + acutest_no_exec_ = 1; +#endif +#ifdef ACUTEST_MACOS_ + if(acutest_AmIBeingDebugged()) + acutest_no_exec_ = 1; +#endif +#ifdef RUNNING_ON_VALGRIND + /* RUNNING_ON_VALGRIND is provided by optionally included */ + if(RUNNING_ON_VALGRIND) + acutest_no_exec_ = 1; +#endif + } + } + + if(acutest_tap_) { + /* TAP requires we know test result ("ok", "not ok") before we output + * anything about the test, and this gets problematic for larger verbose + * levels. */ + if(acutest_verbose_level_ > 2) + acutest_verbose_level_ = 2; + + /* TAP harness should provide some summary. */ + acutest_no_summary_ = 1; + + if(!acutest_worker_) + printf("1..%d\n", (int) acutest_count_); + } + + int index = acutest_worker_index_; + for(i = 0; acutest_list_[i].func != NULL; i++) { + int run = (acutest_test_data_[i].flags & ACUTEST_FLAG_RUN_); + if (acutest_skip_mode_) /* Run all tests except those listed. */ + run = !run; + if(run) + acutest_run_(´st_list_[i], index++, i); + } + + /* Write a summary */ + if(!acutest_no_summary_ && acutest_verbose_level_ >= 1) { + if(acutest_verbose_level_ >= 3) { + acutest_colored_printf_(ACUTEST_COLOR_DEFAULT_INTENSIVE_, "Summary:\n"); + + printf(" Count of all unit tests: %4d\n", (int) acutest_list_size_); + printf(" Count of run unit tests: %4d\n", acutest_stat_run_units_); + printf(" Count of failed unit tests: %4d\n", acutest_stat_failed_units_); + printf(" Count of skipped unit tests: %4d\n", (int) acutest_list_size_ - acutest_stat_run_units_); + } + + if(acutest_stat_failed_units_ == 0) { + acutest_colored_printf_(ACUTEST_COLOR_GREEN_INTENSIVE_, "SUCCESS:"); + printf(" All unit tests have passed.\n"); + } else { + acutest_colored_printf_(ACUTEST_COLOR_RED_INTENSIVE_, "FAILED:"); + printf(" %d of %d unit tests %s failed.\n", + acutest_stat_failed_units_, acutest_stat_run_units_, + (acutest_stat_failed_units_ == 1) ? "has" : "have"); + } + + if(acutest_verbose_level_ >= 3) + printf("\n"); + } + + if (acutest_xml_output_) { +#if defined ACUTEST_UNIX_ + char *suite_name = basename(argv[0]); +#elif defined ACUTEST_WIN_ + char suite_name[_MAX_FNAME]; + _splitpath(argv[0], NULL, NULL, suite_name, NULL); +#else + const char *suite_name = argv[0]; +#endif + fprintf(acutest_xml_output_, "\n"); + fprintf(acutest_xml_output_, "\n", + suite_name, (int)acutest_list_size_, acutest_stat_failed_units_, acutest_stat_failed_units_, + (int)acutest_list_size_ - acutest_stat_run_units_); + for(i = 0; acutest_list_[i].func != NULL; i++) { + struct acutest_test_data_ *details = ´st_test_data_[i]; + fprintf(acutest_xml_output_, " \n", acutest_list_[i].name, details->duration); + if (details->flags & ACUTEST_FLAG_FAILURE_) + fprintf(acutest_xml_output_, " \n"); + if (!(details->flags & ACUTEST_FLAG_FAILURE_) && !(details->flags & ACUTEST_FLAG_SUCCESS_)) + fprintf(acutest_xml_output_, " \n"); + fprintf(acutest_xml_output_, " \n"); + } + fprintf(acutest_xml_output_, "\n"); + fclose(acutest_xml_output_); + } + + acutest_cleanup_(); + + return (acutest_stat_failed_units_ == 0) ? 0 : 1; +} + + +#endif /* #ifndef TEST_NO_MAIN */ + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* #ifndef ACUTEST_H */ From 96fc645034815fffbfac36e85f6a69546ee1d2bb Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Fri, 13 Oct 2023 11:56:50 +0200 Subject: [PATCH 03/70] feat(lsm): implement bt remove --- lsm/include/lsm.h | 12 +++--- lsm/src/bt/lsm_bt.c | 89 ++++++++++++++++++++++++++++++++++------ lsm/src/bt/lsm_bt.h | 6 ++- lsm/src/lsm_store.c | 3 +- lsm/src/lsm_store_node.c | 5 ++- lsm/src/lsm_store_node.h | 3 +- lsm/test/bt/bt.c | 43 +++++++++++++++++++ 7 files changed, 135 insertions(+), 26 deletions(-) diff --git a/lsm/include/lsm.h b/lsm/include/lsm.h index b216d71..2430091 100644 --- a/lsm/include/lsm.h +++ b/lsm/include/lsm.h @@ -6,10 +6,10 @@ #define LSM_MAX_SKIP_SIZE 8 typedef enum lsm_error { - lsm_error_ok = 0, - lsm_error_failed_alloc = 1, - lsm_error_not_found = 2, - lsm_error_already_present = 3 + lsm_error_ok = 0, + lsm_error_failed_alloc = 1, + lsm_error_not_found = 2, + lsm_error_already_present = 3 } lsm_error; /** @@ -29,9 +29,7 @@ typedef struct lsm_string { * The type of an attribute. Each type is represented as a single bit of a * 32-bit integer, so they can be easily combined into a bitmap. */ -typedef enum lsm_attr_type { - lsm_attr_type_entry_type = 1 << 0 -} lsm_attr_type; +typedef enum lsm_attr_type { lsm_attr_type_entry_type = 1 << 0 } lsm_attr_type; /** * A single attribute associated with an entry diff --git a/lsm/src/bt/lsm_bt.c b/lsm/src/bt/lsm_bt.c index 6b7b7bf..aadaaaf 100644 --- a/lsm/src/bt/lsm_bt.c +++ b/lsm/src/bt/lsm_bt.c @@ -17,6 +17,20 @@ lsm_error lsm_bt_node_init(lsm_bt_node **ptr, const char key, void *data) { return lsm_error_ok; } +void lsm_bt_node_free(lsm_bt_node *node) { free(node); } + +void lsm_bt_node_free_recursive(lsm_bt_node *node) { + if (node->left != NULL) { + lsm_bt_node_free_recursive(node->left); + } + + if (node->right != NULL) { + lsm_bt_node_free_recursive(node->right); + } + + lsm_bt_node_free(node); +} + lsm_error lsm_bt_init(lsm_bt **ptr) { lsm_bt *bt = calloc(1, sizeof(lsm_bt)); @@ -29,18 +43,26 @@ lsm_error lsm_bt_init(lsm_bt **ptr) { return lsm_error_ok; } +void lsm_bt_free(lsm_bt *bt) { + if (bt->root != NULL) { + lsm_bt_node_free_recursive(bt->root); + } + + free(bt); +} + lsm_error lsm_bt_insert(lsm_bt *bt, char key, void *data) { lsm_bt_node **dest = &bt->root; // Traverse down the tree until we reach the new point to insert our node - while (*dest != NULL) { - if ((*dest)->key == key) { - return lsm_error_already_present; - } - + while ((*dest != NULL) && ((*dest)->key != key)) { dest = key < (*dest)->key ? &(*dest)->left : &(*dest)->right; } + if (*dest != NULL) { + return lsm_error_already_present; + } + lsm_bt_node *node; if (lsm_bt_node_init(&node, key, data) != lsm_error_ok) { @@ -56,15 +78,56 @@ lsm_error lsm_bt_insert(lsm_bt *bt, char key, void *data) { lsm_error lsm_bt_search(void **out, lsm_bt *bt, char key) { lsm_bt_node *node = bt->root; - while (node != NULL) { - if (node->key == key) { - *out = node->data; - - return lsm_error_ok; - } - + while ((node != NULL) && (node->key != key)) { node = key < node->key ? node->left : node->right; } - return lsm_error_not_found; + if (node == NULL) { + return lsm_error_not_found; + } + + *out = node->data; + + return lsm_error_ok; +} + +lsm_error lsm_bt_remove(void **out, lsm_bt *bt, char key) { + if (bt->root == NULL) { + return lsm_error_not_found; + } + + lsm_bt_node **dest = &bt->root; + + while ((*dest != NULL) && ((*dest)->key != key)) { + dest = key < (*dest)->key ? &(*dest)->left : &(*dest)->right; + } + + if (*dest == NULL) { + return lsm_error_not_found; + } + + *out = (*dest)->data; + bt->size--; + + if (((*dest)->left != NULL) && ((*dest)->right != NULL)) { + lsm_bt_node **succ = &(*dest)->right; + + while ((*succ)->left != NULL) { + succ = &(*succ)->left; + } + + (*dest)->key = (*succ)->key; + (*dest)->data = (*succ)->data; + + lsm_bt_node *succ_replacement = (*succ)->right; + lsm_bt_node_free(*succ); + *succ = succ_replacement; + } else { + lsm_bt_node *replacement = + (*dest)->left != NULL ? (*dest)->left : (*dest)->right; + lsm_bt_node_free(*dest); + *dest = replacement; + } + + return lsm_error_ok; } diff --git a/lsm/src/bt/lsm_bt.h b/lsm/src/bt/lsm_bt.h index a886f68..60219d1 100644 --- a/lsm/src/bt/lsm_bt.h +++ b/lsm/src/bt/lsm_bt.h @@ -68,11 +68,13 @@ lsm_error lsm_bt_search(void **out, lsm_bt *bt, char key); lsm_error lsm_bt_insert(lsm_bt *bt, char key, void *data); /** - * Remove the given key from the binary tree. + * Remove the given key from the binary tree. Ownership of the data pointer is + * returned to the caller. * + * @param out address to write data pointer to * @param bt binary tree to remove from * @param key key to remove */ -lsm_error lsm_bt_remove(lsm_bt *bt, char key); +lsm_error lsm_bt_remove(void **out, lsm_bt *bt, char key); #endif diff --git a/lsm/src/lsm_store.c b/lsm/src/lsm_store.c index e167836..f3503bc 100644 --- a/lsm/src/lsm_store.c +++ b/lsm/src/lsm_store.c @@ -6,7 +6,8 @@ /** * Initialize a new lsm_store struct. * - * @param lsm_store pointer to where to store the newly allocated object's pointer + * @param lsm_store pointer to where to store the newly allocated object's + * pointer * @return success of the function */ lsm_error lsm_store_init(lsm_store **ptr) { diff --git a/lsm/src/lsm_store_node.c b/lsm/src/lsm_store_node.c index 63bc238..b50f15e 100644 --- a/lsm/src/lsm_store_node.c +++ b/lsm/src/lsm_store_node.c @@ -1,7 +1,7 @@ #include -#include "lsm_store_node.h" #include "lsm.h" +#include "lsm_store_node.h" lsm_error lsm_store_inode_init(lsm_store_inode **ptr, const char c) { lsm_store_inode *node = calloc(1, sizeof(lsm_store_inode)); @@ -28,7 +28,8 @@ lsm_error lsm_store_node_init(lsm_store_node **ptr) { return lsm_error_ok; } -lsm_error lsm_store_node_search(lsm_store_node **out_ptr, lsm_store_node *node, const char c) { +lsm_error lsm_store_node_search(lsm_store_node **out_ptr, lsm_store_node *node, + const char c) { if (node->size == 0) { return lsm_error_not_found; } diff --git a/lsm/src/lsm_store_node.h b/lsm/src/lsm_store_node.h index 7fbcff3..548eb53 100644 --- a/lsm/src/lsm_store_node.h +++ b/lsm/src/lsm_store_node.h @@ -37,6 +37,7 @@ lsm_error lsm_store_node_init(lsm_store_node **out); /** * Search for the next node following the given character, if present. */ -lsm_error lsm_store_node_search(lsm_store_node **out, lsm_store_node *node, const char c); +lsm_error lsm_store_node_search(lsm_store_node **out, lsm_store_node *node, + const char c); #endif diff --git a/lsm/test/bt/bt.c b/lsm/test/bt/bt.c index 152a6bc..21b4ed2 100644 --- a/lsm/test/bt/bt.c +++ b/lsm/test/bt/bt.c @@ -9,6 +9,7 @@ void test_init() { BT_INIT(); + lsm_bt_free(bt); } void test_insert_first() { @@ -22,6 +23,8 @@ void test_insert_first() { TEST_CHECK(data == (void *)1); TEST_CHECK(lsm_bt_search(&data, bt, 'b') == lsm_error_not_found); + + lsm_bt_free(bt); } void test_insert_two() { @@ -38,6 +41,8 @@ void test_insert_two() { TEST_CHECK(lsm_bt_search(&data, bt, 'b') == lsm_error_ok); TEST_CHECK(data == (void *)2); TEST_CHECK(lsm_bt_search(&data, bt, 'c') == lsm_error_not_found); + + lsm_bt_free(bt); } void test_insert_multiple() { @@ -56,6 +61,42 @@ void test_insert_multiple() { TEST_CHECK(lsm_bt_search(&data, bt, chars[i]) == lsm_error_ok); TEST_CHECK(data == (void *)(i + 1)); } + + lsm_bt_free(bt); +} + +void test_remove_root() { + BT_INIT(); + + TEST_CHECK(lsm_bt_insert(bt, 'a', (void *)1) == lsm_error_ok); + + void *data; + TEST_CHECK(lsm_bt_remove(&data, bt, 'a') == lsm_error_ok); + TEST_CHECK(data == (void *)1); + TEST_CHECK(bt->root == NULL); + + lsm_bt_free(bt); +} + +void test_remove_multiple() { + char chars[] = "falcoep"; + size_t char_count = sizeof(chars) / sizeof(char); + + BT_INIT(); + + for (size_t i = 0; i < char_count; i++) { + TEST_CHECK(lsm_bt_insert(bt, chars[i], (void *)(i + 1)) == lsm_error_ok); + } + + void *data; + TEST_CHECK(lsm_bt_remove(&data, bt, 'l') == lsm_error_ok); + TEST_CHECK(data == (void *)3); + TEST_CHECK(lsm_bt_remove(&data, bt, 'l') == lsm_error_not_found); + TEST_CHECK(lsm_bt_remove(&data, bt, 'e') == lsm_error_ok); + TEST_CHECK(data == (void *)6); + TEST_CHECK(lsm_bt_remove(&data, bt, 'e') == lsm_error_not_found); + + lsm_bt_free(bt); } TEST_LIST = { @@ -63,5 +104,7 @@ TEST_LIST = { { "test insert first", test_insert_first }, { "test insert two", test_insert_two }, { "test insert multiple", test_insert_multiple }, + { "test remove root", test_remove_root }, + { "test remove multiple", test_remove_multiple }, { NULL, NULL } }; From c327be80e994c6c3024399ec89eecb52704205c4 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Fri, 13 Oct 2023 12:45:48 +0200 Subject: [PATCH 04/70] feat(lsm): started string implementation --- lsm/include/lsm.h | 85 +++++++++++------------ lsm/{src/bt/lsm_bt.h => include/lsm/bt.h} | 35 +--------- lsm/include/lsm/str.h | 60 ++++++++++++++++ lsm/include/lsm/trie.h | 25 +++++++ lsm/src/bt/lsm_bt.c | 3 +- lsm/src/bt/lsm_bt_internal.h | 38 ++++++++++ lsm/src/lsm_store.c | 16 ++--- lsm/src/lsm_store_node.h | 4 +- lsm/src/string/lsm_str.c | 61 ++++++++++++++++ lsm/src/string/lsm_str_internal.h | 16 +++++ lsm/test/bt/bt.c | 3 +- 11 files changed, 256 insertions(+), 90 deletions(-) rename lsm/{src/bt/lsm_bt.h => include/lsm/bt.h} (63%) create mode 100644 lsm/include/lsm/str.h create mode 100644 lsm/include/lsm/trie.h create mode 100644 lsm/src/bt/lsm_bt_internal.h create mode 100644 lsm/src/string/lsm_str.c create mode 100644 lsm/src/string/lsm_str_internal.h diff --git a/lsm/include/lsm.h b/lsm/include/lsm.h index 2430091..aa76826 100644 --- a/lsm/include/lsm.h +++ b/lsm/include/lsm.h @@ -12,55 +12,52 @@ typedef enum lsm_error { lsm_error_already_present = 3 } lsm_error; -/** - * Represents a string (or really any kind of data) with a known length. Data - * with length 8 or less is stored inside the pointer, and does not allocate - * additional memory. - */ -typedef struct lsm_string { - uint64_t len; - union { - void *ptr; - char val[8]; - } str; -} lsm_string; +/*typedef struct lsm_string { */ +/* uint64_t len; */ +/* union { */ +/* void *ptr; */ +/* char val[8]; */ +/* } str; */ +/*} lsm_string; */ -/** - * The type of an attribute. Each type is represented as a single bit of a - * 32-bit integer, so they can be easily combined into a bitmap. - */ -typedef enum lsm_attr_type { lsm_attr_type_entry_type = 1 << 0 } lsm_attr_type; +/*/1** */ +/* * The type of an attribute. Each type is represented as a single bit of a */ +/* * 32-bit integer, so they can be easily combined into a bitmap. */ +/* *1/ */ +/*typedef enum lsm_attr_type { lsm_attr_type_entry_type = 1 << 0 } + * lsm_attr_type; */ -/** - * A single attribute associated with an entry - */ -typedef struct lsm_attr { - lsm_attr_type type; - lsm_string str; -} lsm_attr; +/*/1** */ +/* * A single attribute associated with an entry */ +/* *1/ */ +/*typedef struct lsm_attr { */ +/* lsm_attr_type type; */ +/* lsm_string str; */ +/*} lsm_attr; */ -/** - * Represents a collection of attributes for an entry. A collection can only - * contain one of each attribute. +/*/1** */ +/* * Represents a collection of attributes for an entry. A collection can only */ -typedef struct lsm_attr_list { - uint64_t count; - lsm_attr *items; - uint32_t bitmap; -} lsm_attr_list; +/* * contain one of each attribute. */ +/* *1/ */ +/*typedef struct lsm_attr_list { */ +/* uint64_t count; */ +/* lsm_attr *items; */ +/* uint32_t bitmap; */ +/*} lsm_attr_list; */ -/** - * An entry inside an LSM store - */ -typedef struct lsm_entry { - lsm_string key; - lsm_attr_list attrs; - lsm_string data; -} lsm_entry; +/*/1** */ +/* * An entry inside an LSM store */ +/* *1/ */ +/*typedef struct lsm_entry { */ +/* lsm_string key; */ +/* lsm_attr_list attrs; */ +/* lsm_string data; */ +/*} lsm_entry; */ -/** - * A store of entries, which manages its data both in-memory and on disk. - */ -typedef struct lsm_store lsm_store; +/*/1** */ +/* * A store of entries, which manages its data both in-memory and on disk. */ +/* *1/ */ +/*typedef struct lsm_store lsm_store; */ #endif diff --git a/lsm/src/bt/lsm_bt.h b/lsm/include/lsm/bt.h similarity index 63% rename from lsm/src/bt/lsm_bt.h rename to lsm/include/lsm/bt.h index 60219d1..a2826b0 100644 --- a/lsm/src/bt/lsm_bt.h +++ b/lsm/include/lsm/bt.h @@ -1,41 +1,12 @@ -#ifndef LSM_BT_INTERNAL -#define LSM_BT_INTERNAL - -#include +#ifndef LSM_BT +#define LSM_BT #include "lsm.h" -/** - * Node inside a binary tree - */ -typedef struct lsm_bt_node { - struct lsm_bt_node *left; - struct lsm_bt_node *right; - void *data; - char key; -} lsm_bt_node; - -/** - * Initialize a new binary tree node - * - * @param ptr where to store newly allocated pointer - * @param key key for the node - * @param data data to store - */ -lsm_error lsm_bt_node_init(lsm_bt_node **ptr, const char key, void *data); - -/** - * Deallocate a single binary tree node - */ -void lsm_bt_node_free(lsm_bt_node *node); - /** * A binary tree implementation using char values as keys */ -typedef struct lsm_bt { - lsm_bt_node *root; - uint8_t size; -} lsm_bt; +typedef struct lsm_bt lsm_bt; /** * Initialize a new binary tree diff --git a/lsm/include/lsm/str.h b/lsm/include/lsm/str.h new file mode 100644 index 0000000..58930ec --- /dev/null +++ b/lsm/include/lsm/str.h @@ -0,0 +1,60 @@ +#ifndef LSM_STR +#define LSM_STR + +#include "lsm.h" + +/** + * Represents a string (or really any kind of data) with a known length. Data + * with length 8 or less is stored inside the pointer, and does not allocate + * additional memory. + */ +typedef struct lsm_str lsm_str; + +/** + * Allocate a new string struct of length 0. + * + * @param ptr pointer to store newly allocated pointer in + */ +lsm_error lsm_str_init_zero(lsm_str **ptr); + +/** + * Update an existing lsm_str so it now represents the new provided string. The + * string pointer of the original object is free'd if needed. + * + * @param str lsm_str object to modify + * @param s string to convert into lsm string; ownership is taken over + */ +void lsm_str_init_prealloc(lsm_str *str, char *s); + +/** + * Allocate and initialize a new lsm_str object + * + * @param ptr pointer to store newly allocated pointer + * @param s string to convert into lsm string; ownership is taken over + */ +lsm_error lsm_str_init(lsm_str **ptr, char *s); + +/** + * Deallocate the existing internal string if needed and replace the lsm_str + * with a string of length 0, wiping its contents. + * + * @param str string to wipe + */ +void lsm_str_zero(lsm_str *str); + +/** + * Deallocate the string and its internal char buffer if needed. Only call this + * on heap-allocated strings. + * + * @param str string to dealloate + */ +void lsm_str_free(lsm_str *str); + +/** + * Return the length of the string. + * + * @param str string to return length for. + */ +uint64_t lsm_str_len(lsm_str *str); + +#endif diff --git a/lsm/include/lsm/trie.h b/lsm/include/lsm/trie.h new file mode 100644 index 0000000..c50d1b3 --- /dev/null +++ b/lsm/include/lsm/trie.h @@ -0,0 +1,25 @@ +#ifndef LSM_TRIE +#define LSM_TRIE + +#include "lsm.h" + +/** + * A struct representing a trie + */ +typedef struct lsm_trie lsm_trie; + +/** + * Initialize a new trie. + * + * @param ptr where to store the newly allocated pointer + */ +lsm_error lsm_trie_init(lsm_trie **ptr); + +/** + * Deallocate an entire trie, including all its nodes + * + * @param trie trie to free + */ +void lsm_trie_free(lsm_trie *trie); + +#endif diff --git a/lsm/src/bt/lsm_bt.c b/lsm/src/bt/lsm_bt.c index aadaaaf..d6e8699 100644 --- a/lsm/src/bt/lsm_bt.c +++ b/lsm/src/bt/lsm_bt.c @@ -1,7 +1,6 @@ #include -#include "lsm.h" -#include "lsm_bt.h" +#include "lsm_bt_internal.h" lsm_error lsm_bt_node_init(lsm_bt_node **ptr, const char key, void *data) { lsm_bt_node *node = calloc(1, sizeof(lsm_bt_node)); diff --git a/lsm/src/bt/lsm_bt_internal.h b/lsm/src/bt/lsm_bt_internal.h new file mode 100644 index 0000000..4b55771 --- /dev/null +++ b/lsm/src/bt/lsm_bt_internal.h @@ -0,0 +1,38 @@ +#ifndef LSM_BT_INTERNAL +#define LSM_BT_INTERNAL + +#include + +#include "lsm.h" +#include "lsm/bt.h" + +/** + * Node inside a binary tree + */ +typedef struct lsm_bt_node { + struct lsm_bt_node *left; + struct lsm_bt_node *right; + void *data; + char key; +} lsm_bt_node; + +/** + * Initialize a new binary tree node + * + * @param ptr where to store newly allocated pointer + * @param key key for the node + * @param data data to store + */ +lsm_error lsm_bt_node_init(lsm_bt_node **ptr, const char key, void *data); + +/** + * Deallocate a single binary tree node + */ +void lsm_bt_node_free(lsm_bt_node *node); + +struct lsm_bt { + lsm_bt_node *root; + uint8_t size; +}; + +#endif diff --git a/lsm/src/lsm_store.c b/lsm/src/lsm_store.c index f3503bc..1a5a445 100644 --- a/lsm/src/lsm_store.c +++ b/lsm/src/lsm_store.c @@ -10,14 +10,14 @@ * pointer * @return success of the function */ -lsm_error lsm_store_init(lsm_store **ptr) { - lsm_store *store = calloc(1, sizeof(lsm_store)); +/* lsm_error lsm_store_init(lsm_store **ptr) { */ +/* lsm_store *store = calloc(1, sizeof(lsm_store)); */ - if (store == NULL) { - return lsm_error_failed_alloc; - } +/* if (store == NULL) { */ +/* return lsm_error_failed_alloc; */ +/* } */ - *ptr = store; +/* *ptr = store; */ - return lsm_error_ok; -} +/* return lsm_error_ok; */ +/* } */ diff --git a/lsm/src/lsm_store_node.h b/lsm/src/lsm_store_node.h index 548eb53..826b312 100644 --- a/lsm/src/lsm_store_node.h +++ b/lsm/src/lsm_store_node.h @@ -23,10 +23,10 @@ lsm_error lsm_store_inode_init(lsm_store_inode **ptr, const char c); * binary tree. */ typedef struct lsm_store_node { - lsm_entry *entry; + /* lsm_entry *entry; */ lsm_store_inode *root; uint8_t size; - lsm_string skip; + /* lsm_string skip; */ } lsm_store_node; /** diff --git a/lsm/src/string/lsm_str.c b/lsm/src/string/lsm_str.c new file mode 100644 index 0000000..81e8797 --- /dev/null +++ b/lsm/src/string/lsm_str.c @@ -0,0 +1,61 @@ +#include +#include +#include + +#include "lsm.h" +#include "lsm_str_internal.h" + +lsm_error lsm_str_init_zero(lsm_str **ptr) { + lsm_str *str = calloc(1, sizeof(lsm_str)); + + if (str == NULL) { + return lsm_error_failed_alloc; + } + + *ptr = str; + + return lsm_error_ok; +} + +void lsm_str_init_prealloc(lsm_str *str, char *s) { + str->len = strlen(s); + + if (str->len <= 8) { + memcpy(str->data.val, s, str->len); + free(s); + } else { + str->data.ptr = s; + } +} + +lsm_error lsm_str_init(lsm_str **ptr, char *s) { + lsm_str *str = calloc(1, sizeof(lsm_str)); + + if (str == NULL) { + return lsm_error_failed_alloc; + } + + lsm_str_init_prealloc(str, s); + + *ptr = str; + + return lsm_error_ok; +} + +void lsm_str_zero(lsm_str *str) { + if (str->len > 8) { + free(str->data.ptr); + } + + str->len = 0; +} + +void lsm_str_free(lsm_str *str) { + if (str->len > 8) { + free(str->data.ptr); + } + + free(str); +} + +uint64_t lsm_str_len(lsm_str *str) { return str->len; } diff --git a/lsm/src/string/lsm_str_internal.h b/lsm/src/string/lsm_str_internal.h new file mode 100644 index 0000000..909a0df --- /dev/null +++ b/lsm/src/string/lsm_str_internal.h @@ -0,0 +1,16 @@ +#ifndef LSM_STR_INTERNAL +#define LSM_STR_INTERNAL + +#include + +#include "lsm/str.h" + +struct lsm_str { + uint64_t len; + union { + void *ptr; + char val[8]; + } data; +}; + +#endif diff --git a/lsm/test/bt/bt.c b/lsm/test/bt/bt.c index 21b4ed2..f2d2781 100644 --- a/lsm/test/bt/bt.c +++ b/lsm/test/bt/bt.c @@ -1,6 +1,5 @@ #include "test.h" -#include "lsm.h" -#include "lsm_bt.h" +#include "lsm_bt_internal.h" #define BT_INIT() \ lsm_bt *bt; \ From 0548efda97fe73a04f4e6f66b923ddf8cb200b93 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Fri, 13 Oct 2023 13:07:40 +0200 Subject: [PATCH 05/70] refactor(lsm): allow modules to import other internal header files --- lsm/config.mk | 2 +- lsm/include/lsm/trie.h | 35 +++++++++++++++++++ .../lsm/bt_internal.h} | 0 .../lsm/str_internal.h} | 0 lsm/src/_include/lsm/trie_internal.h | 14 ++++++++ lsm/src/bt/lsm_bt.c | 2 +- lsm/src/{string => str}/lsm_str.c | 2 +- lsm/src/trie/lsm_trie.c | 1 + lsm/test/bt/bt.c | 2 +- 9 files changed, 54 insertions(+), 4 deletions(-) rename lsm/src/{bt/lsm_bt_internal.h => _include/lsm/bt_internal.h} (100%) rename lsm/src/{string/lsm_str_internal.h => _include/lsm/str_internal.h} (100%) create mode 100644 lsm/src/_include/lsm/trie_internal.h rename lsm/src/{string => str}/lsm_str.c (96%) create mode 100644 lsm/src/trie/lsm_trie.c diff --git a/lsm/config.mk b/lsm/config.mk index c453c6a..310b7c4 100644 --- a/lsm/config.mk +++ b/lsm/config.mk @@ -3,7 +3,7 @@ LIB_FILENAME = liblsm.a BUILD_DIR = build SRC_DIR = src TEST_DIR = test -INC_DIRS = include +INC_DIRS = include src/_include # -MMD: generate a .d file for every source file. This file can be imported by # make and makes make aware that a header file has been changed, ensuring an diff --git a/lsm/include/lsm/trie.h b/lsm/include/lsm/trie.h index c50d1b3..7fd6b5b 100644 --- a/lsm/include/lsm/trie.h +++ b/lsm/include/lsm/trie.h @@ -2,6 +2,7 @@ #define LSM_TRIE #include "lsm.h" +#include "lsm/str.h" /** * A struct representing a trie @@ -22,4 +23,38 @@ lsm_error lsm_trie_init(lsm_trie **ptr); */ void lsm_trie_free(lsm_trie *trie); +/** + * Insert a new element into the trie using the specified key. + * + * @param trie trie to insert into + * @param key key to insert data with + * @param data data to insert + */ +lsm_error lsm_trie_insert(lsm_trie *trie, lsm_str *key, void *data); + +/** + * Search for an element in the trie. + * + * @param out where to store data opinter, if present + * @param trie trie to search in + * @param key key to search with + */ +lsm_error lsm_trie_search(void **data, lsm_trie *trie, lsm_str *key); + +/** + * Remove an element from the trie. + * + * @param out where to store the removed data pointer, if present. + * @param trie trie to remove from + * @param key key to remove + */ +lsm_error lsm_trie_remove(void **data, lsm_trie *trie, lsm_str *key); + +/** + * Return the size of a trie + * + * @param trie trie to return size for + */ +uint64_t lsm_trie_size(lsm_trie *trie); + #endif diff --git a/lsm/src/bt/lsm_bt_internal.h b/lsm/src/_include/lsm/bt_internal.h similarity index 100% rename from lsm/src/bt/lsm_bt_internal.h rename to lsm/src/_include/lsm/bt_internal.h diff --git a/lsm/src/string/lsm_str_internal.h b/lsm/src/_include/lsm/str_internal.h similarity index 100% rename from lsm/src/string/lsm_str_internal.h rename to lsm/src/_include/lsm/str_internal.h diff --git a/lsm/src/_include/lsm/trie_internal.h b/lsm/src/_include/lsm/trie_internal.h new file mode 100644 index 0000000..4fb7037 --- /dev/null +++ b/lsm/src/_include/lsm/trie_internal.h @@ -0,0 +1,14 @@ +#ifndef LSM_TRIE_INTERNAL +#define LSM_TRIE_INTERNAL + +#include "lsm/bt_internal.h" +#include "lsm/str_internal.h" +#include "lsm/trie.h" + +typedef struct lsm_trie_node { + lsm_bt bt; + lsm_str skip; + char c; +} lsm_trie_node; + +#endif diff --git a/lsm/src/bt/lsm_bt.c b/lsm/src/bt/lsm_bt.c index d6e8699..da08cbd 100644 --- a/lsm/src/bt/lsm_bt.c +++ b/lsm/src/bt/lsm_bt.c @@ -1,6 +1,6 @@ #include -#include "lsm_bt_internal.h" +#include "lsm/bt_internal.h" lsm_error lsm_bt_node_init(lsm_bt_node **ptr, const char key, void *data) { lsm_bt_node *node = calloc(1, sizeof(lsm_bt_node)); diff --git a/lsm/src/string/lsm_str.c b/lsm/src/str/lsm_str.c similarity index 96% rename from lsm/src/string/lsm_str.c rename to lsm/src/str/lsm_str.c index 81e8797..38bce13 100644 --- a/lsm/src/string/lsm_str.c +++ b/lsm/src/str/lsm_str.c @@ -3,7 +3,7 @@ #include #include "lsm.h" -#include "lsm_str_internal.h" +#include "lsm/str_internal.h" lsm_error lsm_str_init_zero(lsm_str **ptr) { lsm_str *str = calloc(1, sizeof(lsm_str)); diff --git a/lsm/src/trie/lsm_trie.c b/lsm/src/trie/lsm_trie.c new file mode 100644 index 0000000..568decb --- /dev/null +++ b/lsm/src/trie/lsm_trie.c @@ -0,0 +1 @@ +#include "lsm/trie_internal.h" diff --git a/lsm/test/bt/bt.c b/lsm/test/bt/bt.c index f2d2781..1900305 100644 --- a/lsm/test/bt/bt.c +++ b/lsm/test/bt/bt.c @@ -1,5 +1,5 @@ #include "test.h" -#include "lsm_bt_internal.h" +#include "lsm/bt_internal.h" #define BT_INIT() \ lsm_bt *bt; \ From 622d644f2510c207a4fb0516bdd998961ac1ed3b Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Fri, 13 Oct 2023 21:10:31 +0200 Subject: [PATCH 06/70] feat(lsm): possibly implemented trie insert --- lsm/include/lsm.h | 3 +- lsm/include/lsm/bt.h | 10 +++ lsm/include/lsm/str.h | 61 +++++++++++++ lsm/src/_include/lsm/str_internal.h | 2 +- lsm/src/_include/lsm/trie_internal.h | 24 ++++- lsm/src/bt/lsm_bt.c | 17 ++++ lsm/src/str/lsm_str.c | 99 +++++++++++++++++++++ lsm/src/trie/lsm_trie.c | 127 +++++++++++++++++++++++++++ 8 files changed, 340 insertions(+), 3 deletions(-) diff --git a/lsm/include/lsm.h b/lsm/include/lsm.h index aa76826..8ecb958 100644 --- a/lsm/include/lsm.h +++ b/lsm/include/lsm.h @@ -9,7 +9,8 @@ typedef enum lsm_error { lsm_error_ok = 0, lsm_error_failed_alloc = 1, lsm_error_not_found = 2, - lsm_error_already_present = 3 + lsm_error_already_present = 3, + lsm_error_null_value = 4 } lsm_error; /*typedef struct lsm_string { */ diff --git a/lsm/include/lsm/bt.h b/lsm/include/lsm/bt.h index a2826b0..a0995a1 100644 --- a/lsm/include/lsm/bt.h +++ b/lsm/include/lsm/bt.h @@ -48,4 +48,14 @@ lsm_error lsm_bt_insert(lsm_bt *bt, char key, void *data); */ lsm_error lsm_bt_remove(void **out, lsm_bt *bt, char key); +/** + * Replace the data at an existing key with new data, returning the old. + * + * @param out address to write old data pointer to + * @param bt binary tree to replace in + * @param key key to replace at + * @param data new data to store + */ +lsm_error lsm_bt_replace(void **out, lsm_bt *bt, char key, void *data); + #endif diff --git a/lsm/include/lsm/str.h b/lsm/include/lsm/str.h index 58930ec..b790a32 100644 --- a/lsm/include/lsm/str.h +++ b/lsm/include/lsm/str.h @@ -57,4 +57,65 @@ void lsm_str_free(lsm_str *str); */ uint64_t lsm_str_len(lsm_str *str); +/** + * Return a pointer to the string's underlying char array. Note that this array + * will *not* neccessarily be null-terminatd. + * + * @param str string to return pointer for + */ +const char *lsm_str_ptr(lsm_str *str); + +/** + * Returns the character at the specified position. + * + * @index index of character to return + */ +char lsm_str_char(lsm_str *str, uint64_t index); + +/** + * Take a substring and copy it to a provided string object. + * + * @param out string to store new substring in. The contents of this string will + * be replaced. + * @param str string to take substring from + * @param start inclusive start index for the substring. If this is greater than + * or equal to the string's length, out will be a zero-length string. + * @param end exclusive end index for the substring + */ +lsm_error lsm_str_substr(lsm_str *out, lsm_str *str, uint64_t start, + uint64_t end); + +/** + * Return the first index where s1 and s2 differ, starting at their respective + * offsets. If both strings are equal (or one is a prefix of the other), the + * result will be the length of the shortest string. The returned value is + * relative to the given offets. + * + * @param s1 string to compare + * @param s1_offset offset inside s1 to start comparing from + * @param s2 string to compare s1 to + * @param s2_offset offset inside s2 to start comparing from + */ +uint64_t lsm_str_cmp(lsm_str *s1, uint64_t s1_offset, lsm_str *s2, + uint64_t s2_offset); + +/** + * Truncate a string in-place. + * + * @param s string to truncate + * @param new_len new length of the string. If new_len is >= the original + * length, this function does nothing. + */ +lsm_error lsm_str_truncate(lsm_str *s, uint64_t new_len); + +/** + * Split s at the specified index, saving the second half the string in s2. + * + * @param s string to split + * @param s2 string to store second part of s + * @param index position to split string. If index is the length of s or + * greater, s2 will simply be an empty string. + */ +lsm_error lsm_str_split(lsm_str *s, lsm_str *s2, uint64_t index); + #endif diff --git a/lsm/src/_include/lsm/str_internal.h b/lsm/src/_include/lsm/str_internal.h index 909a0df..03f5395 100644 --- a/lsm/src/_include/lsm/str_internal.h +++ b/lsm/src/_include/lsm/str_internal.h @@ -8,7 +8,7 @@ struct lsm_str { uint64_t len; union { - void *ptr; + char *ptr; char val[8]; } data; }; diff --git a/lsm/src/_include/lsm/trie_internal.h b/lsm/src/_include/lsm/trie_internal.h index 4fb7037..e3526d9 100644 --- a/lsm/src/_include/lsm/trie_internal.h +++ b/lsm/src/_include/lsm/trie_internal.h @@ -5,10 +5,32 @@ #include "lsm/str_internal.h" #include "lsm/trie.h" +/** + * A node inside a trie structure + */ typedef struct lsm_trie_node { lsm_bt bt; lsm_str skip; - char c; + void *data; } lsm_trie_node; +/** + * Allocate and initialize a new trie node + * + * @param ptr pointer to store new node pointer + */ +lsm_error lsm_trie_node_init(lsm_trie_node **ptr); + +/** + * Deallocate a trie node + * + * @param node node to deallocate + */ +void lsm_trie_node_free(lsm_trie_node *node); + +struct lsm_trie { + lsm_trie_node *root; + uint64_t size; +}; + #endif diff --git a/lsm/src/bt/lsm_bt.c b/lsm/src/bt/lsm_bt.c index da08cbd..d5b2895 100644 --- a/lsm/src/bt/lsm_bt.c +++ b/lsm/src/bt/lsm_bt.c @@ -130,3 +130,20 @@ lsm_error lsm_bt_remove(void **out, lsm_bt *bt, char key) { return lsm_error_ok; } + +lsm_error lsm_bt_replace(void **out, lsm_bt *bt, char key, void *data) { + lsm_bt_node *node = bt->root; + + while ((node != NULL) && (node->key != key)) { + node = key < node->key ? node->left : node->right; + } + + if (node == NULL) { + return lsm_error_not_found; + } + + *out = node->data; + node->data = data; + + return lsm_error_ok; +} diff --git a/lsm/src/str/lsm_str.c b/lsm/src/str/lsm_str.c index 38bce13..0e4e75b 100644 --- a/lsm/src/str/lsm_str.c +++ b/lsm/src/str/lsm_str.c @@ -5,6 +5,8 @@ #include "lsm.h" #include "lsm/str_internal.h" +#define MIN(x, y) (((x) < (y)) ? (x) : (y)) + lsm_error lsm_str_init_zero(lsm_str **ptr) { lsm_str *str = calloc(1, sizeof(lsm_str)); @@ -59,3 +61,100 @@ void lsm_str_free(lsm_str *str) { } uint64_t lsm_str_len(lsm_str *str) { return str->len; } + +const char *lsm_str_ptr(lsm_str *str) { + if (str->len <= 8) { + return str->data.val; + } else { + return str->data.ptr; + } +} + +char lsm_str_char(lsm_str *str, uint64_t index) { + if (str->len <= 8) { + return str->data.val[index]; + } else { + return str->data.ptr[index]; + } +} + +lsm_error lsm_str_substr(lsm_str *out, lsm_str *str, uint64_t start, + uint64_t end) { + // A substring that starts past the string's length will have length 0 + uint64_t len = start < str->len ? end - start : 0; + const char *str_ptr = lsm_str_ptr(str); + + if (len <= 8) { + lsm_str_zero(out); + memcpy(out->data.val, &str_ptr[start], len); + } else { + char *buf = malloc(len * sizeof(char)); + + if (buf == NULL) { + return lsm_error_failed_alloc; + } + + memcpy(buf, &str_ptr[start], len); + + lsm_str_zero(out); + out->data.ptr = buf; + } + + out->len = len; + + return lsm_error_ok; +} + +uint64_t lsm_str_cmp(lsm_str *s1, uint64_t s1_offset, lsm_str *s2, + uint64_t s2_offset) { + uint64_t index = 0; + uint64_t max_len = MIN(s1->len - s1_offset, s2->len - s2_offset); + + while ((index < max_len) && (lsm_str_char(s1, s1_offset + index) == + lsm_str_char(s2, s2_offset + index))) { + index++; + } + + return index; +} + +lsm_error lsm_str_truncate(lsm_str *s, uint64_t new_len) { + if (new_len >= s->len) { + return lsm_error_ok; + } + + if (new_len <= 8) { + char *s_buf = s->data.ptr; + + memcpy(s->data.val, lsm_str_ptr(s), new_len); + + if (s->len > 8) { + free(s_buf); + } + } else { + char *buf = malloc(new_len * sizeof(char)); + + if (buf == NULL) { + return lsm_error_failed_alloc; + } + + memcpy(buf, s->data.ptr, new_len); + free(s->data.ptr); + + s->data.ptr = buf; + } + + s->len = new_len; + + return lsm_error_ok; +} + +lsm_error lsm_str_split(lsm_str *s, lsm_str *s2, uint64_t index) { + lsm_error res = lsm_str_substr(s2, s, index, s->len); + + if (res != lsm_error_ok) { + return res; + } + + return lsm_str_truncate(s, index); +} diff --git a/lsm/src/trie/lsm_trie.c b/lsm/src/trie/lsm_trie.c index 568decb..c7708df 100644 --- a/lsm/src/trie/lsm_trie.c +++ b/lsm/src/trie/lsm_trie.c @@ -1 +1,128 @@ +#include + +#include "lsm.h" #include "lsm/trie_internal.h" + +lsm_error lsm_trie_node_init(lsm_trie_node **ptr) { + lsm_trie_node *node = calloc(1, sizeof(lsm_trie_node)); + + if (node == NULL) { + return lsm_error_failed_alloc; + } + + *ptr = node; + + return lsm_error_ok; +} + +lsm_error lsm_trie_init(lsm_trie **ptr) { + lsm_trie *trie = calloc(1, sizeof(lsm_trie)); + + if (trie == NULL) { + return lsm_error_failed_alloc; + } + + lsm_trie_node *root; + lsm_error res = lsm_trie_node_init(&root); + + if (res != lsm_error_ok) { + return res; + } + + trie->root = root; + *ptr = trie; + + return lsm_error_ok; +} + +lsm_error lsm_trie_insert(lsm_trie *trie, lsm_str *key, void *data) { + // NULL is not allowed as a data value, as it's used to indicate a lack of + // data + if (data == NULL) { + return lsm_error_null_value; + } + + uint64_t key_len = lsm_str_len(key); + + // Empty string is represented by the root + if (key_len == 0) { + if (trie->root->data == NULL) { + trie->root->data = data; + + return lsm_error_ok; + } else { + return lsm_error_already_present; + } + } + + uint64_t index = 0; + lsm_trie_node *node = trie->root; + lsm_trie_node *next_node; + lsm_error res; + + while (index < key_len) { + char c = lsm_str_char(key, index); + res = lsm_bt_search((void **)&next_node, &node->bt, c); + + // No child is present yet for this character, so we can insert the string + // here + if (res == lsm_error_not_found) { + lsm_trie_node *new_node; + res = lsm_trie_node_init(&new_node); + + if (res != lsm_error_ok) { + return res; + } + + new_node->data = data; + lsm_str_substr(&new_node->skip, key, index + 1, key_len); + + return lsm_bt_insert(&node->bt, c, new_node); + } + + index++; + + // We compare the remaining part of the key with the node's skip. If cmp is + // less than the length of the skip, we know they differ and the edge should + // be split. + uint64_t cmp = lsm_str_cmp(key, index, &next_node->skip, 0); + + if (cmp < lsm_str_len(&next_node->skip)) { + lsm_trie_node *split_node; + res = lsm_trie_node_init(&split_node); + + if (res != lsm_error_ok) { + return res; + } + + // split_node replaces the original node as the new child node + lsm_trie_node *bottom_node; + lsm_bt_replace((void **)&bottom_node, &node->bt, c, split_node); + + // The old child node now becomes the child of split_node + lsm_bt_insert(&split_node->bt, lsm_str_char(key, index + cmp), + bottom_node); + + // The new node splits the edge into two parts, so the new node will have + // the remaining part of the skip (minus the one character) as its skip + lsm_str_substr(&split_node->skip, &next_node->skip, cmp + 1, + lsm_str_len(&next_node->skip)); + + // The old node keeps the first part of the skip + lsm_str_truncate(&next_node->skip, cmp); + + next_node = split_node; + } + + node = next_node; + index += cmp; + } + + if (node->data != NULL) { + return lsm_error_already_present; + } + + node->data = data; + + return lsm_error_ok; +} From 87000e8f73c7633a3edc26124f796774771cc06f Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Fri, 13 Oct 2023 22:08:06 +0200 Subject: [PATCH 07/70] feat(lsm): added trie search --- lsm/include/lsm/str.h | 9 ++++++++ lsm/src/str/lsm_str.c | 27 ++++++++++++++++++++++ lsm/src/trie/lsm_trie.c | 50 +++++++++++++++++++++++++++++++++++++++++ lsm/test/bt/bt.c | 12 +++++----- lsm/test/trie/trie.c | 32 ++++++++++++++++++++++++++ 5 files changed, 124 insertions(+), 6 deletions(-) create mode 100644 lsm/test/trie/trie.c diff --git a/lsm/include/lsm/str.h b/lsm/include/lsm/str.h index b790a32..346fd54 100644 --- a/lsm/include/lsm/str.h +++ b/lsm/include/lsm/str.h @@ -34,6 +34,15 @@ void lsm_str_init_prealloc(lsm_str *str, char *s); */ lsm_error lsm_str_init(lsm_str **ptr, char *s); +/** + * Same as lsm_str_init, except it copies the original string instead of taking + * over ownership, leaving the original string untouched. + * + * @param ptr pointer to store newly allocated pointer + * @param s string to copy into lsm string + */ +lsm_error lsm_str_init_copy(lsm_str **ptr, char *s); + /** * Deallocate the existing internal string if needed and replace the lsm_str * with a string of length 0, wiping its contents. diff --git a/lsm/src/str/lsm_str.c b/lsm/src/str/lsm_str.c index 0e4e75b..a33c700 100644 --- a/lsm/src/str/lsm_str.c +++ b/lsm/src/str/lsm_str.c @@ -44,6 +44,33 @@ lsm_error lsm_str_init(lsm_str **ptr, char *s) { return lsm_error_ok; } +lsm_error lsm_str_init_copy(lsm_str **ptr, char *s) { + lsm_str *str = calloc(1, sizeof(lsm_str)); + + if (str == NULL) { + return lsm_error_failed_alloc; + } + + str->len = strlen(s); + + if (str->len <= 8) { + memcpy(str->data.val, s, str->len); + } else { + char *buf = malloc(str->len * sizeof(char)); + + if (buf == NULL) { + return lsm_error_failed_alloc; + } + + memcpy(buf, s, str->len); + str->data.ptr = buf; + } + + *ptr = str; + + return lsm_error_ok; +} + void lsm_str_zero(lsm_str *str) { if (str->len > 8) { free(str->data.ptr); diff --git a/lsm/src/trie/lsm_trie.c b/lsm/src/trie/lsm_trie.c index c7708df..541f89f 100644 --- a/lsm/src/trie/lsm_trie.c +++ b/lsm/src/trie/lsm_trie.c @@ -118,6 +118,8 @@ lsm_error lsm_trie_insert(lsm_trie *trie, lsm_str *key, void *data) { index += cmp; } + // This catches the edge case where the exact node for the string is already + // present in the trie if (node->data != NULL) { return lsm_error_already_present; } @@ -126,3 +128,51 @@ lsm_error lsm_trie_insert(lsm_trie *trie, lsm_str *key, void *data) { return lsm_error_ok; } + +lsm_error lsm_trie_search(void **data, lsm_trie *trie, lsm_str *key) { + uint64_t key_len = lsm_str_len(key); + + if (key_len == 0) { + if (trie->root->data != NULL) { + *data = trie->root->data; + + return lsm_error_ok; + } else { + return lsm_error_not_found; + } + } + + uint64_t index = 0; + lsm_trie_node *node = trie->root; + lsm_trie_node *next_node; + lsm_error res; + + while (index < key_len) { + char c = lsm_str_char(key, index); + res = lsm_bt_search((void **)&next_node, &node->bt, c); + + if (res != lsm_error_ok) { + return res; + } + + index++; + + uint64_t cmp = lsm_str_cmp(key, index, &next_node->skip, 0); + + // If we end in the middle of an edge, we definitely haven't found the node + if (cmp != lsm_str_len(&next_node->skip)) { + return lsm_error_not_found; + } + + node = next_node; + index += cmp; + } + + if (node->data == NULL) { + return lsm_error_not_found; + } + + *data = node->data; + + return lsm_error_ok; +} diff --git a/lsm/test/bt/bt.c b/lsm/test/bt/bt.c index 1900305..f96cf99 100644 --- a/lsm/test/bt/bt.c +++ b/lsm/test/bt/bt.c @@ -99,11 +99,11 @@ void test_remove_multiple() { } TEST_LIST = { - { "test init", test_init }, - { "test insert first", test_insert_first }, - { "test insert two", test_insert_two }, - { "test insert multiple", test_insert_multiple }, - { "test remove root", test_remove_root }, - { "test remove multiple", test_remove_multiple }, + { "bt init", test_init }, + { "bt insert first", test_insert_first }, + { "bt insert two", test_insert_two }, + { "bt insert multiple", test_insert_multiple }, + { "bt remove root", test_remove_root }, + { "bt remove multiple", test_remove_multiple }, { NULL, NULL } }; diff --git a/lsm/test/trie/trie.c b/lsm/test/trie/trie.c new file mode 100644 index 0000000..f3bf73b --- /dev/null +++ b/lsm/test/trie/trie.c @@ -0,0 +1,32 @@ +#include "lsm.h" +#include "test.h" +#include "lsm/trie_internal.h" + +#define TRIE_INIT() \ + lsm_trie *trie; \ + TEST_CHECK(lsm_trie_init(&trie) == lsm_error_ok); \ + TEST_CHECK(trie != NULL) + +void test_init() { + TRIE_INIT(); + /* lsm_trie_free(trie); */ +} + +void test_insert_one() { + TRIE_INIT(); + + lsm_str *s; + lsm_str_init_copy(&s, "hello"); + TEST_CHECK(lsm_trie_insert(trie, s, (void *)1) == lsm_error_ok); + TEST_CHECK(lsm_trie_insert(trie, s, (void *)1) == lsm_error_already_present); + + void *data; + TEST_CHECK(lsm_trie_search(&data, trie, s) == lsm_error_ok); + TEST_CHECK(data == (void *)1); +} + +TEST_LIST = { + { "trie init", test_init }, + { "trie insert one", test_insert_one }, + { NULL, NULL } +}; From ef8129b8ebec0d7e09c178ed9cf74012bbadbb3b Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Sat, 14 Oct 2023 14:33:22 +0200 Subject: [PATCH 08/70] feat(lsm): write str tests; start trie tests --- lsm/include/lsm/str.h | 66 +++++++---- lsm/src/str/lsm_str.c | 71 +++++++----- lsm/src/trie/lsm_trie.c | 6 + lsm/test/str/str.c | 91 +++++++++++++++ lsm/test/trie/fuzzy.h | 222 +++++++++++++++++++++++++++++++++++++ lsm/test/trie/trie.c | 2 +- lsm/test/trie/trie_fuzzy.c | 35 ++++++ 7 files changed, 441 insertions(+), 52 deletions(-) create mode 100644 lsm/test/str/str.c create mode 100644 lsm/test/trie/fuzzy.h create mode 100644 lsm/test/trie/trie_fuzzy.c diff --git a/lsm/include/lsm/str.h b/lsm/include/lsm/str.h index 346fd54..52659c1 100644 --- a/lsm/include/lsm/str.h +++ b/lsm/include/lsm/str.h @@ -1,6 +1,8 @@ #ifndef LSM_STR #define LSM_STR +#include + #include "lsm.h" /** @@ -10,22 +12,6 @@ */ typedef struct lsm_str lsm_str; -/** - * Allocate a new string struct of length 0. - * - * @param ptr pointer to store newly allocated pointer in - */ -lsm_error lsm_str_init_zero(lsm_str **ptr); - -/** - * Update an existing lsm_str so it now represents the new provided string. The - * string pointer of the original object is free'd if needed. - * - * @param str lsm_str object to modify - * @param s string to convert into lsm string; ownership is taken over - */ -void lsm_str_init_prealloc(lsm_str *str, char *s); - /** * Allocate and initialize a new lsm_str object * @@ -35,17 +21,45 @@ void lsm_str_init_prealloc(lsm_str *str, char *s); lsm_error lsm_str_init(lsm_str **ptr, char *s); /** - * Same as lsm_str_init, except it copies the original string instead of taking - * over ownership, leaving the original string untouched. + * Allocate a new string struct of length 0. + * + * @param ptr pointer to store newly allocated pointer in + */ +lsm_error lsm_str_init_zero(lsm_str **ptr); + +/** + * Allocate and initialize a new lsm_str object, but copy the original string + * instead of taking over ownership, leaving the original string untouched. * * @param ptr pointer to store newly allocated pointer * @param s string to copy into lsm string */ lsm_error lsm_str_init_copy(lsm_str **ptr, char *s); +/** + * Overwrite an existing lsm_str so it now represents the new provided string. + * The string pointer of the original object is free'd if needed. Ownership of + * the pointer is taken over. + * + * @param str lsm_str object to modify + * @param s string to convert into lsm string; ownership is taken over + */ +void lsm_str_overwrite(lsm_str *str, char *s); + +/** + * Overwrite an existing lsm_str so it now represents the new provided string. + * The string pointer of the original object is free'd if needed. The provided + * string is copied, leaving the original untouched. + * + * @param str lsm_str object to modify + * @param s string to convert into lsm string; ownership is taken over + */ +lsm_error lsm_str_overwrite_copy(lsm_str *str, char *s); + /** * Deallocate the existing internal string if needed and replace the lsm_str - * with a string of length 0, wiping its contents. + * with a string of length 0, wiping its contents. This function can be used as + * a substitute for lsm_str_free for stack-allocated structs. * * @param str string to wipe */ @@ -85,7 +99,8 @@ char lsm_str_char(lsm_str *str, uint64_t index); * Take a substring and copy it to a provided string object. * * @param out string to store new substring in. The contents of this string will - * be replaced. + * be replaced. This string is assumed to be unitialized, so zero this string + * manually if you're overwriting an existing string. * @param str string to take substring from * @param start inclusive start index for the substring. If this is greater than * or equal to the string's length, out will be a zero-length string. @@ -109,7 +124,16 @@ uint64_t lsm_str_cmp(lsm_str *s1, uint64_t s1_offset, lsm_str *s2, uint64_t s2_offset); /** - * Truncate a string in-place. + * Checks whether the two strings are identical. + * + * @param s1 first string to compare + * @param s2 second string to compare + * @return true if their values are equal, false otherwise + */ +bool lsm_str_eq(lsm_str *s1, lsm_str *s2); + +/** + * Truncate an already initialized string in-place. * * @param s string to truncate * @param new_len new length of the string. If new_len is >= the original diff --git a/lsm/src/str/lsm_str.c b/lsm/src/str/lsm_str.c index a33c700..2244e52 100644 --- a/lsm/src/str/lsm_str.c +++ b/lsm/src/str/lsm_str.c @@ -7,6 +7,20 @@ #define MIN(x, y) (((x) < (y)) ? (x) : (y)) +lsm_error lsm_str_init(lsm_str **ptr, char *s) { + lsm_str *str = calloc(1, sizeof(lsm_str)); + + if (str == NULL) { + return lsm_error_failed_alloc; + } + + lsm_str_overwrite(str, s); + + *ptr = str; + + return lsm_error_ok; +} + lsm_error lsm_str_init_zero(lsm_str **ptr) { lsm_str *str = calloc(1, sizeof(lsm_str)); @@ -19,7 +33,21 @@ lsm_error lsm_str_init_zero(lsm_str **ptr) { return lsm_error_ok; } -void lsm_str_init_prealloc(lsm_str *str, char *s) { +lsm_error lsm_str_init_copy(lsm_str **ptr, char *s) { + lsm_str *str = calloc(1, sizeof(lsm_str)); + + if (str == NULL) { + return lsm_error_failed_alloc; + } + + lsm_str_overwrite_copy(str, s); + + *ptr = str; + + return lsm_error_ok; +} + +void lsm_str_overwrite(lsm_str *str, char *s) { str->len = strlen(s); if (str->len <= 8) { @@ -30,27 +58,7 @@ void lsm_str_init_prealloc(lsm_str *str, char *s) { } } -lsm_error lsm_str_init(lsm_str **ptr, char *s) { - lsm_str *str = calloc(1, sizeof(lsm_str)); - - if (str == NULL) { - return lsm_error_failed_alloc; - } - - lsm_str_init_prealloc(str, s); - - *ptr = str; - - return lsm_error_ok; -} - -lsm_error lsm_str_init_copy(lsm_str **ptr, char *s) { - lsm_str *str = calloc(1, sizeof(lsm_str)); - - if (str == NULL) { - return lsm_error_failed_alloc; - } - +lsm_error lsm_str_overwrite_copy(lsm_str *str, char *s) { str->len = strlen(s); if (str->len <= 8) { @@ -66,8 +74,6 @@ lsm_error lsm_str_init_copy(lsm_str **ptr, char *s) { str->data.ptr = buf; } - *ptr = str; - return lsm_error_ok; } @@ -80,10 +86,7 @@ void lsm_str_zero(lsm_str *str) { } void lsm_str_free(lsm_str *str) { - if (str->len > 8) { - free(str->data.ptr); - } - + lsm_str_zero(str); free(str); } @@ -112,7 +115,7 @@ lsm_error lsm_str_substr(lsm_str *out, lsm_str *str, uint64_t start, const char *str_ptr = lsm_str_ptr(str); if (len <= 8) { - lsm_str_zero(out); + /* lsm_str_zero(out); */ memcpy(out->data.val, &str_ptr[start], len); } else { char *buf = malloc(len * sizeof(char)); @@ -123,7 +126,7 @@ lsm_error lsm_str_substr(lsm_str *out, lsm_str *str, uint64_t start, memcpy(buf, &str_ptr[start], len); - lsm_str_zero(out); + /* lsm_str_zero(out); */ out->data.ptr = buf; } @@ -185,3 +188,11 @@ lsm_error lsm_str_split(lsm_str *s, lsm_str *s2, uint64_t index) { return lsm_str_truncate(s, index); } + +bool lsm_str_eq(lsm_str *s1, lsm_str *s2) { + if (s1->len != s2->len) { + return false; + } + + return memcmp(lsm_str_ptr(s1), lsm_str_ptr(s2), s1->len) == 0; +} diff --git a/lsm/src/trie/lsm_trie.c b/lsm/src/trie/lsm_trie.c index 541f89f..e72c288 100644 --- a/lsm/src/trie/lsm_trie.c +++ b/lsm/src/trie/lsm_trie.c @@ -35,6 +35,8 @@ lsm_error lsm_trie_init(lsm_trie **ptr) { return lsm_error_ok; } +uint64_t lsm_trie_size(lsm_trie *trie) { return trie->size; } + lsm_error lsm_trie_insert(lsm_trie *trie, lsm_str *key, void *data) { // NULL is not allowed as a data value, as it's used to indicate a lack of // data @@ -48,6 +50,7 @@ lsm_error lsm_trie_insert(lsm_trie *trie, lsm_str *key, void *data) { if (key_len == 0) { if (trie->root->data == NULL) { trie->root->data = data; + trie->size++; return lsm_error_ok; } else { @@ -75,6 +78,8 @@ lsm_error lsm_trie_insert(lsm_trie *trie, lsm_str *key, void *data) { } new_node->data = data; + trie->size++; + lsm_str_substr(&new_node->skip, key, index + 1, key_len); return lsm_bt_insert(&node->bt, c, new_node); @@ -125,6 +130,7 @@ lsm_error lsm_trie_insert(lsm_trie *trie, lsm_str *key, void *data) { } node->data = data; + trie->size++; return lsm_error_ok; } diff --git a/lsm/test/str/str.c b/lsm/test/str/str.c new file mode 100644 index 0000000..4cbd2d1 --- /dev/null +++ b/lsm/test/str/str.c @@ -0,0 +1,91 @@ +#include "test.h" +#include "lsm/str_internal.h" + +void test_cmp() { + lsm_str s1, s2, s3; + lsm_str_overwrite_copy(&s1, "some_string"); + lsm_str_overwrite_copy(&s2, "some"); + lsm_str_overwrite_copy(&s3, "some_string_extra"); + + TEST_CHECK(lsm_str_cmp(&s1, 0, &s2, 0) == 4); + TEST_CHECK(lsm_str_cmp(&s1, 0, &s2, 1) == 0); + TEST_CHECK(lsm_str_cmp(&s1, 1, &s2, 1) == 3); + TEST_CHECK(lsm_str_cmp(&s1, 1, &s2, 0) == 0); + + TEST_CHECK(lsm_str_cmp(&s1, 0, &s3, 0) == lsm_str_len(&s1)); +} + +void test_eq() { + lsm_str s1, s2; + lsm_str_overwrite_copy(&s1, "longerthan8"); + lsm_str_overwrite_copy(&s2, "longerthan8"); + + TEST_CHECK(lsm_str_eq(&s1, &s2)); + + lsm_str_overwrite_copy(&s1, "longerthan8"); + lsm_str_overwrite_copy(&s2, "lmaolongerthan8"); + + TEST_CHECK(!lsm_str_eq(&s1, &s2)); + + lsm_str_overwrite_copy(&s1, "short"); + lsm_str_overwrite_copy(&s2, "short"); + + TEST_CHECK(lsm_str_eq(&s1, &s2)); + + lsm_str_overwrite_copy(&s1, "short"); + lsm_str_overwrite_copy(&s1, "shorte"); + + TEST_CHECK(!lsm_str_eq(&s1, &s2)); + + lsm_str_overwrite_copy(&s1, "longerthan8"); + lsm_str_overwrite_copy(&s2, "short"); + + TEST_CHECK(!lsm_str_eq(&s1, &s2)); +} + +void test_substr() { + lsm_str s1, s2, s3; + lsm_str_overwrite_copy(&s1, "some_string"); + lsm_str_overwrite_copy(&s3, "string"); + lsm_str_substr(&s2, &s1, 5, lsm_str_len(&s1)); + + TEST_CHECK(lsm_str_eq(&s2, &s3)); + + lsm_str_zero(&s2); + lsm_str_substr(&s2, &s1, 25, lsm_str_len(&s1)); + + TEST_CHECK(lsm_str_len(&s2) == 0); +} + +void test_truncate() { + lsm_str s1, s2, s3; + lsm_str_overwrite_copy(&s1, "some_longer_string_thing"); + lsm_str_overwrite_copy(&s2, "some_longer_string"); + lsm_str_overwrite_copy(&s3, "some"); + + lsm_str_truncate(&s1, 18); + TEST_CHECK(lsm_str_eq(&s1, &s2)); + + lsm_str_truncate(&s1, 4); + TEST_CHECK(lsm_str_eq(&s1, &s3)); +} + +void test_init_copy() { + char orig[] = "some_string"; + lsm_str *s; + lsm_str_init_copy(&s, orig); + + TEST_CHECK(s->data.ptr != orig); + TEST_CHECK(strcmp(s->data.ptr, orig) == 0); + + lsm_str_free(s); +} + +TEST_LIST = { + { "str init_copy", test_init_copy }, + { "str cmp", test_cmp }, + { "str eq", test_eq }, + { "str substr", test_substr }, + { "str truncate", test_truncate }, + { NULL, NULL } +}; diff --git a/lsm/test/trie/fuzzy.h b/lsm/test/trie/fuzzy.h new file mode 100644 index 0000000..6cd0a07 --- /dev/null +++ b/lsm/test/trie/fuzzy.h @@ -0,0 +1,222 @@ +#ifndef LSM_TRIE_FUZZY_TEST +#define LSM_TRIE_FUZZY_TEST + +#include +#include +#include +#include + +#include "lsm/trie.h" +#include "lsm/str_internal.h" + +typedef struct fuzzyconfig { + int seed; + int word_length; + int word_count; +} FuzzyConfig; + +void random_clean_string(char* s, int len) { + char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789,?"; + int charset_len = strlen(charset); + + // len - 1 ensures that we can still set the null byte for the final byte + int actual_len = rand() % (len - 1); + int key; + int i; + + for (i = 0; i < actual_len; i++) { + key = rand() % charset_len; + s[i] = charset[key]; + } + + s[i] = '\0'; +} + +void random_string(char* s, int len) { + int val = rand(); + + // String can't be an empty string as they aren't supported + s[0] = (char)(val % 255 + 1); + + for (int i = 1; i < len - 1; i++) { + val = rand(); + s[i] = (char)(val % 255 + 1); + } + + // Just in case no null characters were created + s[len - 1] = '\0'; +} + +void random_string_matrix(char** s, int count, int len) { + for (int i = 0; i < count; i++) { + random_string(s[i], len); + } +} + +char** init_string_matrix(int count, int len) { + char** matrix = malloc(count * sizeof(char*)); + + for (int i = 0; i < count; i++) { + matrix[i] = calloc(len, sizeof(char)); + } + + return matrix; +} + +lsm_str *lsm_random_string_matrix(int count, int max_len) { + lsm_str *matrix = calloc(count, sizeof(lsm_str)); + + for (int i = 0; i < count; i++) { + int len = rand() % max_len; + char *buf = malloc(len * sizeof(char)); + + for (int i = 0; i < len; i++) { + buf[i] = (char)(rand() % 255 + 1); + } + + lsm_str_overwrite(&matrix[i], buf); + } + + return matrix; +} + +/** + * Test a given trie implementation using randomly generated strings generated + * using a given seed. + * + * @param seed seed to use for generating random strings + * @param count how many strings to test with + * @param len maximum length of each string + * @param init_func function to creat a new trie of the wanted type + * @param free_func function to free the given trie + * @param add_func function to add a string to the given trie + * @param remove_func function to remove a string from the given trie + * @param size_func function to get the size of the given trie + * @return exit code describing failures, if any + */ +int fuzzy_test_trie_seed(FuzzyConfig conf) { + srand(conf.seed); + + lsm_str *matrix = lsm_random_string_matrix(conf.word_count, conf.word_length); + bool* contains = calloc(conf.word_count, sizeof(bool)); + + // It's possible that the string matrix contains duplicate strings + bool** contains_dedupped = calloc(conf.word_count, sizeof(bool*)); + + for (int i = 0; i < conf.word_count; i++) { + if (contains_dedupped[i] == NULL) { + contains_dedupped[i] = &contains[i]; + + for (int j = i + 1; j < conf.word_count; j++) { + if (lsm_str_eq(&matrix[i], &matrix[j])) { + contains_dedupped[j] = &contains[i]; + } + } + } + } + + // We keep track of the size as well so that we can check whether this is + // also correct + size_t size = 0; + + lsm_trie *trie; + lsm_trie_init(&trie); + + bool changed; + lsm_error status; + + // 0: success + // 1: invalid add + // 2: invalid remove + // 3: bad size after adds + // 4: bad size after removes + int exit_code = 0; + + // Add all strings to trie, checking for duplicates + for (int i = 0; i < conf.word_count; i++) { + status = lsm_trie_insert(trie, &matrix[i], (void **)1); + + // if changed is false, *contains_dedupped[i] should be true, as changed + // can only be false if the string is already contained in the trie. if + // changed is true, *contains_dedupped[i] should be false, as the string + // cannot be in the trie yet. + if (status == lsm_error_ok && *contains_dedupped[i]) { + exit_code = 1; + goto END; + } + + if (!*contains_dedupped[i]) { + *contains_dedupped[i] = true; + size++; + } + } + + // Ensure size is correct + if (lsm_trie_size(trie) != size) { + printf("%lu %lu\n", lsm_trie_size(trie), size); + exit_code = 3; + goto END; + } + + // Remove all strings again, again taking duplicates into consideration + /* for (int i = 0; i < conf.word_count; i++) { */ + /* changed = remove_func(ct, matrix[i]); */ + + /* // The string shouldn't be in the trie, yet another add operation */ + /* // says it added it as well */ + /* if (changed != *contains_dedupped[i]) { */ + /* exit_code = 2; */ + /* goto END; */ + /* } */ + + /* if (*contains_dedupped[i]) { */ + /* *contains_dedupped[i] = false; */ + /* size--; */ + /* } */ + /* } */ + + // Finally, check that the trie is completely empty + /* if (size_func(ct) != 0) { */ + /* exit_code = 4; */ + /* } */ + +END: + /* trie_free(ct); */ + + // Even testing functions should properly free memory + free(contains); + free(contains_dedupped); + + for (int i = 0; i < conf.word_count; i++) { + lsm_str_zero(&matrix[i]); + } + + free(matrix); + + return exit_code; +} + +/** + * Same as fuzzy_test_trie_seed, except that the seed is randomly generated. + * + * @param count how many strings to test with + * @param len maximum length of each string + * @param init_func function to creat a new trie of the wanted type + * @param free_func function to free the given trie + * @param add_func function to add a string to the given trie + * @param remove_func function to remove a string from the given trie + * @param size_func function to get the size of the given trie + * @return the generated seed if the test wasn't successful, -1 otherwise. + */ +/* int fuzzy_test_trie(int count, int len, void* (*init_func) (), void (*free_func) (void*), bool (*add_func) (void*, char*), bool (*remove_func) (void*, char*), int (*size_func) (void*)) { */ +/* int seed = rand(); */ +/* bool succeeded = fuzzy_test_trie_seed(seed, count, len, init_func, free_func, add_func, remove_func, size_func); */ + +/* if (!succeeded) { */ +/* return seed; */ +/* } */ + +/* return -1; */ +/* } */ + +#endif diff --git a/lsm/test/trie/trie.c b/lsm/test/trie/trie.c index f3bf73b..db3e6ee 100644 --- a/lsm/test/trie/trie.c +++ b/lsm/test/trie/trie.c @@ -1,5 +1,5 @@ -#include "lsm.h" #include "test.h" +#include "lsm.h" #include "lsm/trie_internal.h" #define TRIE_INIT() \ diff --git a/lsm/test/trie/trie_fuzzy.c b/lsm/test/trie/trie_fuzzy.c new file mode 100644 index 0000000..9974a68 --- /dev/null +++ b/lsm/test/trie/trie_fuzzy.c @@ -0,0 +1,35 @@ +#include "test.h" +#include "lsm.h" +#include "lsm/trie_internal.h" +#include "fuzzy.h" + +void test_fuzzy() { + // Randomize seed + srand(time(NULL)); + + FuzzyConfig config; + int counter = 0; + int res; + + for (int len = 1; len < 25; len += 5) { + for (int count = 10; count <= 500; count += 10) { + for (int i = 0; i < 1; i++) { + counter++; + + config.seed = rand(); + config.word_length = len; + config.word_count = count; + + res = fuzzy_test_trie_seed(config); + TEST_CHECK_(res == 0, + "Failed config, seed = %i, len = %i, count = %i, code = %i", config.seed, config.word_length, config.word_count, res); + } + } + } + TEST_MSG("fuzzy tests done = %i", counter); +} + +TEST_LIST = { + /* { "trie fuzzy", test_fuzzy }, */ + { NULL, NULL} +}; From 682f422e3ce7c64c0dbc362a113ec69543efb58e Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Sat, 14 Oct 2023 14:53:42 +0200 Subject: [PATCH 09/70] feat(lsm): probably working trie insert --- lsm/src/trie/lsm_trie.c | 23 +++++++++++++++-------- lsm/test/trie/trie_fuzzy.c | 4 ++-- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/lsm/src/trie/lsm_trie.c b/lsm/src/trie/lsm_trie.c index e72c288..a8b7d82 100644 --- a/lsm/src/trie/lsm_trie.c +++ b/lsm/src/trie/lsm_trie.c @@ -1,3 +1,4 @@ +#include #include #include "lsm.h" @@ -101,20 +102,26 @@ lsm_error lsm_trie_insert(lsm_trie *trie, lsm_str *key, void *data) { } // split_node replaces the original node as the new child node + // bottom_node here is always the same value as next_node lsm_trie_node *bottom_node; lsm_bt_replace((void **)&bottom_node, &node->bt, c, split_node); - // The old child node now becomes the child of split_node - lsm_bt_insert(&split_node->bt, lsm_str_char(key, index + cmp), - bottom_node); + // The old next node now becomes the child of split_node + lsm_bt_insert(&split_node->bt, lsm_str_char(&next_node->skip, cmp), + next_node); - // The new node splits the edge into two parts, so the new node will have - // the remaining part of the skip (minus the one character) as its skip - lsm_str_substr(&split_node->skip, &next_node->skip, cmp + 1, - lsm_str_len(&next_node->skip)); + // split_node's skip has not been initialized yet, so we can simply + // overwrite it with bottom_node's skip + split_node->skip = next_node->skip; + + // The new node splits the edge into two parts, so the new split node will + // have the first part of the skip (minus the one character) as its + // skip + lsm_str_substr(&next_node->skip, &split_node->skip, cmp + 1, + lsm_str_len(&split_node->skip)); // The old node keeps the first part of the skip - lsm_str_truncate(&next_node->skip, cmp); + lsm_str_truncate(&split_node->skip, cmp); next_node = split_node; } diff --git a/lsm/test/trie/trie_fuzzy.c b/lsm/test/trie/trie_fuzzy.c index 9974a68..9462c27 100644 --- a/lsm/test/trie/trie_fuzzy.c +++ b/lsm/test/trie/trie_fuzzy.c @@ -13,7 +13,7 @@ void test_fuzzy() { for (int len = 1; len < 25; len += 5) { for (int count = 10; count <= 500; count += 10) { - for (int i = 0; i < 1; i++) { + for (int i = 0; i < 10; i++) { counter++; config.seed = rand(); @@ -30,6 +30,6 @@ void test_fuzzy() { } TEST_LIST = { - /* { "trie fuzzy", test_fuzzy }, */ + { "trie fuzzy", test_fuzzy }, { NULL, NULL} }; From 6938c29725bd6f078bba6c1eb9b7dc762c5ed153 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Sat, 14 Oct 2023 15:57:33 +0200 Subject: [PATCH 10/70] feat(lsm): implement a simple trie remove --- lsm/include/lsm/bt.h | 10 +++++ lsm/src/bt/lsm_bt.c | 38 ++++++++++++------ lsm/src/trie/lsm_trie.c | 87 +++++++++++++++++++++++++++++++++++++++-- lsm/test/bt/bt.c | 6 +++ lsm/test/trie/fuzzy.h | 40 +++++++++---------- 5 files changed, 147 insertions(+), 34 deletions(-) diff --git a/lsm/include/lsm/bt.h b/lsm/include/lsm/bt.h index a0995a1..2e30ae5 100644 --- a/lsm/include/lsm/bt.h +++ b/lsm/include/lsm/bt.h @@ -20,6 +20,16 @@ lsm_error lsm_bt_init(lsm_bt **ptr); */ void lsm_bt_free(lsm_bt *bt); +/** + * Remove the binary tree's entire contents, but keep the struct allocated. + */ +void lsm_bt_clear(lsm_bt *bt); + +/** + * Return the size of the binary tree + */ +uint64_t lsm_bt_size(lsm_bt *bt); + /** * Search for the data stored behind the given key. * diff --git a/lsm/src/bt/lsm_bt.c b/lsm/src/bt/lsm_bt.c index d5b2895..69fa895 100644 --- a/lsm/src/bt/lsm_bt.c +++ b/lsm/src/bt/lsm_bt.c @@ -18,16 +18,16 @@ lsm_error lsm_bt_node_init(lsm_bt_node **ptr, const char key, void *data) { void lsm_bt_node_free(lsm_bt_node *node) { free(node); } -void lsm_bt_node_free_recursive(lsm_bt_node *node) { +void lsm_bt_node_free_tree(lsm_bt_node *node) { if (node->left != NULL) { - lsm_bt_node_free_recursive(node->left); + lsm_bt_node_free_tree(node->left); + lsm_bt_node_free(node->left); } if (node->right != NULL) { - lsm_bt_node_free_recursive(node->right); + lsm_bt_node_free_tree(node->right); + lsm_bt_node_free(node->right); } - - lsm_bt_node_free(node); } lsm_error lsm_bt_init(lsm_bt **ptr) { @@ -42,14 +42,23 @@ lsm_error lsm_bt_init(lsm_bt **ptr) { return lsm_error_ok; } -void lsm_bt_free(lsm_bt *bt) { +void lsm_bt_clear(lsm_bt *bt) { if (bt->root != NULL) { - lsm_bt_node_free_recursive(bt->root); - } + lsm_bt_node_free_tree(bt->root); + lsm_bt_node_free(bt->root); + bt->root = NULL; + bt->size = 0; + } +} + +void lsm_bt_free(lsm_bt *bt) { + lsm_bt_clear(bt); free(bt); } +uint64_t lsm_bt_size(lsm_bt *bt) { return bt->size; } + lsm_error lsm_bt_insert(lsm_bt *bt, char key, void *data) { lsm_bt_node **dest = &bt->root; @@ -85,7 +94,9 @@ lsm_error lsm_bt_search(void **out, lsm_bt *bt, char key) { return lsm_error_not_found; } - *out = node->data; + if (out != NULL) { + *out = node->data; + } return lsm_error_ok; } @@ -105,7 +116,9 @@ lsm_error lsm_bt_remove(void **out, lsm_bt *bt, char key) { return lsm_error_not_found; } - *out = (*dest)->data; + if (out != NULL) { + *out = (*dest)->data; + } bt->size--; if (((*dest)->left != NULL) && ((*dest)->right != NULL)) { @@ -142,7 +155,10 @@ lsm_error lsm_bt_replace(void **out, lsm_bt *bt, char key, void *data) { return lsm_error_not_found; } - *out = node->data; + if (out != NULL) { + *out = node->data; + } + node->data = data; return lsm_error_ok; diff --git a/lsm/src/trie/lsm_trie.c b/lsm/src/trie/lsm_trie.c index a8b7d82..b6e190a 100644 --- a/lsm/src/trie/lsm_trie.c +++ b/lsm/src/trie/lsm_trie.c @@ -16,6 +16,12 @@ lsm_error lsm_trie_node_init(lsm_trie_node **ptr) { return lsm_error_ok; } +void lsm_trie_node_free(lsm_trie_node *node) { + lsm_bt_clear(&node->bt); + lsm_str_zero(&node->skip); + free(node); +} + lsm_error lsm_trie_init(lsm_trie **ptr) { lsm_trie *trie = calloc(1, sizeof(lsm_trie)); @@ -142,12 +148,14 @@ lsm_error lsm_trie_insert(lsm_trie *trie, lsm_str *key, void *data) { return lsm_error_ok; } -lsm_error lsm_trie_search(void **data, lsm_trie *trie, lsm_str *key) { +lsm_error lsm_trie_search(void **out, lsm_trie *trie, lsm_str *key) { uint64_t key_len = lsm_str_len(key); if (key_len == 0) { if (trie->root->data != NULL) { - *data = trie->root->data; + if (out != NULL) { + *out = trie->root->data; + } return lsm_error_ok; } else { @@ -185,7 +193,80 @@ lsm_error lsm_trie_search(void **data, lsm_trie *trie, lsm_str *key) { return lsm_error_not_found; } - *data = node->data; + if (out != NULL) { + *out = node->data; + } + + return lsm_error_ok; +} + +lsm_error lsm_trie_remove(void **data, lsm_trie *trie, lsm_str *key) { + uint64_t key_len = lsm_str_len(key); + + if (key_len == 0) { + if (trie->root->data != NULL) { + if (data != NULL) { + *data = trie->root->data; + } + + trie->root->data = NULL; + trie->size--; + + return lsm_error_ok; + } else { + return lsm_error_not_found; + } + } + + uint64_t index = 0; + lsm_trie_node *parent = trie->root; + lsm_trie_node *child; + lsm_error res; + char c; + + while (index < key_len) { + c = lsm_str_char(key, index); + res = lsm_bt_search((void **)&child, &parent->bt, c); + + if (res != lsm_error_ok) { + return res; + } + + index++; + + uint64_t cmp = lsm_str_cmp(key, index, &child->skip, 0); + + // If we end in the middle of an edge, we definitely haven't found the node + if (cmp != lsm_str_len(&child->skip)) { + return lsm_error_not_found; + } + + index += cmp; + + // This context is needed for the removal + if (index < key_len) { + parent = child; + } + } + + if (child->data == NULL) { + return lsm_error_not_found; + } + + // Child is the node we wish to delete + if (data != NULL) { + *data = child->data; + } + + child->data = NULL; + + // We only remove child if it has no children of its own + if (lsm_bt_size(&child->bt) == 0) { + lsm_bt_remove(NULL, &parent->bt, c); + lsm_trie_node_free(child); + } + + trie->size--; return lsm_error_ok; } diff --git a/lsm/test/bt/bt.c b/lsm/test/bt/bt.c index f96cf99..fdff839 100644 --- a/lsm/test/bt/bt.c +++ b/lsm/test/bt/bt.c @@ -54,6 +54,8 @@ void test_insert_multiple() { TEST_CHECK(lsm_bt_insert(bt, chars[i], (void *)(i + 1)) == lsm_error_ok); } + TEST_CHECK(lsm_bt_size(bt) == char_count); + void *data; for (size_t i = 0; i < char_count; i++) { TEST_CHECK(lsm_bt_insert(bt, chars[i], (void *)(i + 1)) == lsm_error_already_present); @@ -68,11 +70,13 @@ void test_remove_root() { BT_INIT(); TEST_CHECK(lsm_bt_insert(bt, 'a', (void *)1) == lsm_error_ok); + TEST_CHECK(lsm_bt_size(bt) == 1); void *data; TEST_CHECK(lsm_bt_remove(&data, bt, 'a') == lsm_error_ok); TEST_CHECK(data == (void *)1); TEST_CHECK(bt->root == NULL); + TEST_CHECK(lsm_bt_size(bt) == 0); lsm_bt_free(bt); } @@ -95,6 +99,8 @@ void test_remove_multiple() { TEST_CHECK(data == (void *)6); TEST_CHECK(lsm_bt_remove(&data, bt, 'e') == lsm_error_not_found); + TEST_CHECK(lsm_bt_size(bt) == char_count - 2); + lsm_bt_free(bt); } diff --git a/lsm/test/trie/fuzzy.h b/lsm/test/trie/fuzzy.h index 6cd0a07..40850e9 100644 --- a/lsm/test/trie/fuzzy.h +++ b/lsm/test/trie/fuzzy.h @@ -122,8 +122,7 @@ int fuzzy_test_trie_seed(FuzzyConfig conf) { lsm_trie *trie; lsm_trie_init(&trie); - bool changed; - lsm_error status; + lsm_error res; // 0: success // 1: invalid add @@ -134,13 +133,13 @@ int fuzzy_test_trie_seed(FuzzyConfig conf) { // Add all strings to trie, checking for duplicates for (int i = 0; i < conf.word_count; i++) { - status = lsm_trie_insert(trie, &matrix[i], (void **)1); + res = lsm_trie_insert(trie, &matrix[i], (void **)1); // if changed is false, *contains_dedupped[i] should be true, as changed // can only be false if the string is already contained in the trie. if // changed is true, *contains_dedupped[i] should be false, as the string // cannot be in the trie yet. - if (status == lsm_error_ok && *contains_dedupped[i]) { + if (res == lsm_error_ok && *contains_dedupped[i]) { exit_code = 1; goto END; } @@ -159,26 +158,27 @@ int fuzzy_test_trie_seed(FuzzyConfig conf) { } // Remove all strings again, again taking duplicates into consideration - /* for (int i = 0; i < conf.word_count; i++) { */ - /* changed = remove_func(ct, matrix[i]); */ + for (int i = 0; i < conf.word_count; i++) { + res = lsm_trie_remove(NULL, trie, &matrix[i]); - /* // The string shouldn't be in the trie, yet another add operation */ - /* // says it added it as well */ - /* if (changed != *contains_dedupped[i]) { */ - /* exit_code = 2; */ - /* goto END; */ - /* } */ + // The string shouldn't be in the trie, yet another add operation + // says it added it as well + if (res == lsm_error_ok && !*contains_dedupped[i]) { + exit_code = 2; + goto END; + } - /* if (*contains_dedupped[i]) { */ - /* *contains_dedupped[i] = false; */ - /* size--; */ - /* } */ - /* } */ + if (*contains_dedupped[i]) { + *contains_dedupped[i] = false; + size--; + } + } // Finally, check that the trie is completely empty - /* if (size_func(ct) != 0) { */ - /* exit_code = 4; */ - /* } */ + if (lsm_trie_size(trie) != 0) { + printf("%lu %lu\n", lsm_trie_size(trie), size); + exit_code = 4; + } END: /* trie_free(ct); */ From 115ee12f040d3fbbceef4b81aba639d72315b27e Mon Sep 17 00:00:00 2001 From: Jef Roosens Date: Tue, 17 Oct 2023 11:25:51 +0200 Subject: [PATCH 11/70] feat(lsm): start store entry implementation --- lsm/include/lsm/store.h | 84 +++++++++++++++++++++++++++ lsm/src/_include/lsm/store_internal.h | 22 +++++++ lsm/src/store/lsm_store_entry.c | 37 ++++++++++++ 3 files changed, 143 insertions(+) create mode 100644 lsm/include/lsm/store.h create mode 100644 lsm/src/_include/lsm/store_internal.h create mode 100644 lsm/src/store/lsm_store_entry.c diff --git a/lsm/include/lsm/store.h b/lsm/include/lsm/store.h new file mode 100644 index 0000000..d0fab21 --- /dev/null +++ b/lsm/include/lsm/store.h @@ -0,0 +1,84 @@ +#ifndef LSM_STORE +#define LSM_STORE + +#include +#include + +#include "lsm.h" +#include "lsm/str.h" + +/** + * The type of an entry attribute. + * + * Each type is represented as a single bit of an + * integer, so they can be easily combined into a bitmap. + */ +typedef enum lsm_attr_type : uint64_t { + lsm_attr_type_entry_type = 1 << 0, + lsm_attr_type_content_type = 1 << 1 +} lsm_attr_type; + +/** + * An entry inside an LSM store. + * + * Each entry consists of the key it's stored behind, zero or more attributes + * (metadata) and a data field. The data field can be stored on disk or + * in-memory, depending on the size. + */ +typedef struct lsm_entry lsm_entry; + +/** + * Allocate and initialize a new lsm_entry object. + * + * @param ptr where to store newly allocated pointer + */ +lsm_error lsm_entry_init(lsm_entry **ptr); + +/** + * Deallocate an existing lsm_entry object. + * + * @param entry object to deallocate + */ +void lsm_entry_free(lsm_entry *entry); + +/** + * Checks whether the entry has an attribute with the specified type. + * + * @param entry entry to check + * @param type type of attribute to check for + */ +bool lsm_entry_attr_present(lsm_entry *entry, lsm_attr_type type); + +/** + * Retrieve the contents of an attribute from an entry, if present + * + * @param out where to store pointer to attribute data + * @param entry entry to search for + * @param type type of attribute to return + */ +lsm_error lsm_entry_attr_get(lsm_str **out, lsm_entry *entry, + lsm_attr_type type); + +/** + * A store consisting of LSM entries. + * + * A store manages both an in-memory data structure for quick lookup, and a + * database file for persistent storage of the contained entries. + */ +typedef struct lsm_store lsm_store; + +/** + * Allocate and initialize a new lsm_store object. + * + * @param ptr where to store newly allocated pointer + */ +lsm_error lsm_store_init(lsm_store **ptr); + +/** + * Dealocate an existing lsm_store object. + * + * @param store object to deallocate + */ +void lsm_store_free(lsm_store *store); + +#endif diff --git a/lsm/src/_include/lsm/store_internal.h b/lsm/src/_include/lsm/store_internal.h new file mode 100644 index 0000000..d0aaed0 --- /dev/null +++ b/lsm/src/_include/lsm/store_internal.h @@ -0,0 +1,22 @@ +#ifndef LSM_STORE_INTERNAL +#define LSM_STORE_INTERNAL + +#include "lsm/store.h" +#include "lsm/str_internal.h" + +typedef struct lsm_attr { + lsm_attr_type type; + lsm_str str; +} lsm_attr; + +struct lsm_entry { + lsm_str key; + struct { + uint64_t count; + uint64_t bitmap; + lsm_attr *items; + } attrs; + lsm_str data; +}; + +#endif diff --git a/lsm/src/store/lsm_store_entry.c b/lsm/src/store/lsm_store_entry.c new file mode 100644 index 0000000..99a2b30 --- /dev/null +++ b/lsm/src/store/lsm_store_entry.c @@ -0,0 +1,37 @@ +#include + +#include "lsm.h" +#include "lsm/store_internal.h" + +lsm_error lsm_entry_init(lsm_entry **ptr) { + lsm_entry *entry = calloc(1, sizeof(lsm_entry)); + + if (entry == NULL) { + return lsm_error_failed_alloc; + } + + *ptr = entry; + + return lsm_error_ok; +} + +bool lsm_entry_attr_present(lsm_entry *entry, lsm_attr_type type) { + return (entry->attrs.bitmap & type) != 0; +} + +lsm_error lsm_entry_attr_get(lsm_str **out, lsm_entry *entry, + lsm_attr_type type) { + if (!lsm_entry_attr_present(entry, type)) { + return lsm_error_not_found; + } + + for (uint64_t i = 0; i < entry->attrs.count; i++) { + if (entry->attrs.items[i].type == type) { + *out = &entry->attrs.items[i].str; + + return lsm_error_ok; + } + } + + return lsm_error_not_found; +} From fca8495de4ed3b75e9003ec3b7b988b1b316bfb8 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Fri, 20 Oct 2023 10:41:53 +0200 Subject: [PATCH 12/70] feat(lsm): implement lsm entry add & remove --- lsm/include/lsm/store.h | 21 ++++++++ lsm/src/_include/lsm/store_internal.h | 7 ++- lsm/src/store/lsm_store.c | 6 +++ lsm/src/store/lsm_store_entry.c | 73 ++++++++++++++++++++++++++- 4 files changed, 105 insertions(+), 2 deletions(-) create mode 100644 lsm/src/store/lsm_store.c diff --git a/lsm/include/lsm/store.h b/lsm/include/lsm/store.h index d0fab21..e542f9d 100644 --- a/lsm/include/lsm/store.h +++ b/lsm/include/lsm/store.h @@ -59,6 +59,27 @@ bool lsm_entry_attr_present(lsm_entry *entry, lsm_attr_type type); lsm_error lsm_entry_attr_get(lsm_str **out, lsm_entry *entry, lsm_attr_type type); +/** + * Add a new attribute to the entry. + * + * @param entry entry to modify + * @param type type of attribute to add + * @param data data of attribute; ownership of pointer is taken over + */ +lsm_error lsm_entry_attr_insert(lsm_entry *entry, lsm_attr_type type, + lsm_str *data); + +/** + * Remove an atribute from the given entry, if present. + * + * @param out pointer to store removed data pointer in. If NULL, data pointer + * can get leaked. + * @param entry entry to remove attribute from + * @param type type of attribute to remove + */ +lsm_error lsm_entry_attr_remove(lsm_str **out, lsm_entry *entry, + lsm_attr_type type); + /** * A store consisting of LSM entries. * diff --git a/lsm/src/_include/lsm/store_internal.h b/lsm/src/_include/lsm/store_internal.h index d0aaed0..b4a833c 100644 --- a/lsm/src/_include/lsm/store_internal.h +++ b/lsm/src/_include/lsm/store_internal.h @@ -3,10 +3,11 @@ #include "lsm/store.h" #include "lsm/str_internal.h" +#include "lsm/trie.h" typedef struct lsm_attr { lsm_attr_type type; - lsm_str str; + lsm_str *str; } lsm_attr; struct lsm_entry { @@ -19,4 +20,8 @@ struct lsm_entry { lsm_str data; }; +struct lsm_store { + lsm_trie *trie; +}; + #endif diff --git a/lsm/src/store/lsm_store.c b/lsm/src/store/lsm_store.c new file mode 100644 index 0000000..8ba5740 --- /dev/null +++ b/lsm/src/store/lsm_store.c @@ -0,0 +1,6 @@ +#include "lsm/store.h" +#include "lsm/store_internal.h" + +/* lsm_error lsm_store_init(lsm_store **ptr) { */ +/* lsm_store *store = */ +/* } */ diff --git a/lsm/src/store/lsm_store_entry.c b/lsm/src/store/lsm_store_entry.c index 99a2b30..aa1c7fc 100644 --- a/lsm/src/store/lsm_store_entry.c +++ b/lsm/src/store/lsm_store_entry.c @@ -1,4 +1,6 @@ +#include #include +#include #include "lsm.h" #include "lsm/store_internal.h" @@ -27,7 +29,7 @@ lsm_error lsm_entry_attr_get(lsm_str **out, lsm_entry *entry, for (uint64_t i = 0; i < entry->attrs.count; i++) { if (entry->attrs.items[i].type == type) { - *out = &entry->attrs.items[i].str; + *out = entry->attrs.items[i].str; return lsm_error_ok; } @@ -35,3 +37,72 @@ lsm_error lsm_entry_attr_get(lsm_str **out, lsm_entry *entry, return lsm_error_not_found; } + +lsm_error lsm_entry_attr_remove(lsm_str **out, lsm_entry *entry, + lsm_attr_type type) { + if (!lsm_entry_attr_present(entry, type)) { + return lsm_error_not_found; + } + + if (entry->attrs.count == 1) { + *out = entry->attrs.items[0].str; + + free(entry->attrs.items); + entry->attrs.items = NULL; + entry->attrs.count = 0; + entry->attrs.bitmap = 0; + + return lsm_error_ok; + } + + uint64_t i = 0; + + while (entry->attrs.items[i].type != type) { + i++; + } + + lsm_attr *new_attrs = malloc((entry->attrs.count - 1) * sizeof(lsm_attr)); + + if (new_attrs == NULL) { + return lsm_error_failed_alloc; + } + + if (out != NULL) { + *out = entry->attrs.items[i].str; + } + + memcpy(new_attrs, entry->attrs.items, i * sizeof(lsm_attr)); + memcpy(&new_attrs[i], &entry->attrs.items[i + 1], + (entry->attrs.count - i - 1) * sizeof(lsm_attr)); + + free(entry->attrs.items); + + entry->attrs.items = new_attrs; + entry->attrs.count--; + entry->attrs.bitmap &= ~type; + + return lsm_error_ok; +} + +lsm_error lsm_entry_attr_insert(lsm_entry *entry, lsm_attr_type type, + lsm_str *data) { + if (lsm_entry_attr_present(entry, type)) { + return lsm_error_already_present; + } + + lsm_attr *new_attrs = + realloc(entry->attrs.items, (entry->attrs.count + 1) * sizeof(lsm_attr)); + + if (new_attrs == NULL) { + return lsm_error_failed_alloc; + } + + new_attrs[entry->attrs.count].type = type; + new_attrs[entry->attrs.count].str = data; + + entry->attrs.items = new_attrs; + entry->attrs.count++; + entry->attrs.bitmap |= type; + + return lsm_error_ok; +} From b552e0a81b0babc8626cd15b0ed8682b1f4e14fe Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Wed, 25 Oct 2023 10:57:45 +0200 Subject: [PATCH 13/70] feat(lsm): some more string functions; start of data streaming api --- Makefile | 7 ++- config.mk | 6 +- lsm/include/lsm.h | 7 ++- lsm/include/lsm/store.h | 43 +++++++++++++- lsm/include/lsm/str.h | 8 +++ lsm/src/_include/lsm/store_internal.h | 15 ++++- lsm/src/store/lsm_store.c | 81 ++++++++++++++++++++++++++- lsm/src/store/lsm_store_entry.c | 12 ++-- lsm/src/str/lsm_str.c | 37 ++++++++++++ 9 files changed, 199 insertions(+), 17 deletions(-) diff --git a/Makefile b/Makefile index ac7af46..1206b6c 100644 --- a/Makefile +++ b/Makefile @@ -39,8 +39,12 @@ objs: $(OBJS) libtrie: $(MAKE) -C trie +.PHONY: liblsm +liblsm: + $(MAKE) -C lsm + .PHONY: bin -$(BIN): libtrie $(OBJS) +$(BIN): libtrie liblsm $(OBJS) $(CC) -o $@ $(OBJS) $(_LDFLAGS) $(BUILD_DIR)/$(SRC_DIR)/%.c.o: $(SRC_DIR)/%.c @@ -104,6 +108,7 @@ fmt: clean: rm -rf $(BUILD_DIR) $(MAKE) -C trie clean + $(MAKE) -C lsm clean .PHONY: bear diff --git a/config.mk b/config.mk index 78bdb30..8336cd1 100644 --- a/config.mk +++ b/config.mk @@ -7,9 +7,9 @@ SRC_DIR = src TEST_DIR = test THIRDPARTY_DIR = thirdparty -INC_DIRS = include $(THIRDPARTY_DIR)/include trie/include -LIBS = trie m -LIB_DIRS = ./trie/build +INC_DIRS = include $(THIRDPARTY_DIR)/include trie/include lsm/include +LIBS = trie m lsm +LIB_DIRS = ./trie/build ./lsm/build # -MMD: generate a .d file for every source file. This file can be imported by # make and makes make aware that a header file has been changed, ensuring an diff --git a/lsm/include/lsm.h b/lsm/include/lsm.h index 8ecb958..f5ca373 100644 --- a/lsm/include/lsm.h +++ b/lsm/include/lsm.h @@ -3,7 +3,12 @@ #include -#define LSM_MAX_SKIP_SIZE 8 +#define LSM_RES(x) \ + { \ + lsm_error res = x; \ + if (res != lsm_error_ok) \ + return res; \ + } typedef enum lsm_error { lsm_error_ok = 0, diff --git a/lsm/include/lsm/store.h b/lsm/include/lsm/store.h index e542f9d..7edcc03 100644 --- a/lsm/include/lsm/store.h +++ b/lsm/include/lsm/store.h @@ -7,6 +7,8 @@ #include "lsm.h" #include "lsm/str.h" +#define LSM_STORE_DISK_THRESHOLD 1024 + /** * The type of an entry attribute. * @@ -73,7 +75,7 @@ lsm_error lsm_entry_attr_insert(lsm_entry *entry, lsm_attr_type type, * Remove an atribute from the given entry, if present. * * @param out pointer to store removed data pointer in. If NULL, data pointer - * can get leaked. + * will be leaked. * @param entry entry to remove attribute from * @param type type of attribute to remove */ @@ -95,6 +97,15 @@ typedef struct lsm_store lsm_store; */ lsm_error lsm_store_init(lsm_store **ptr); +/** + * Open the given database file and load it into a new store object. + * + * @param ptr pointer to store newly allocated store + * @param db_path path to the database file + * @param data_path path to the data directory + */ +lsm_error lsm_store_open(lsm_store **ptr, char *db_path, char *data_path); + /** * Dealocate an existing lsm_store object. * @@ -102,4 +113,34 @@ lsm_error lsm_store_init(lsm_store **ptr); */ void lsm_store_free(lsm_store *store); +/** + * Search for an entry in the store. + * + * @param out pointer to store entry pointer in + * @param store store to search in + * @param key key to look with + */ +lsm_error lsm_store_search(lsm_entry **out, lsm_store *store, lsm_str *key); + +/** + * Allocate a new entry in the store with the specified key. + * + * @param out pointer to store new entry pointer in + * @param store store to modify + * @param key key to add; ownership of key pointer is taken over + */ +lsm_error lsm_store_insert(lsm_entry **out, lsm_store *store, lsm_str *key); + +/** + * Append new data to the given entry, which is expected to be in the store. + * + * This function will append either to disk or to memory, depending on the + * length of the entry's data. + * + * @param store store the entry is stored in + * @param entry entry to append data to + * @param data data to append + */ +lsm_error lsm_store_data_append(lsm_store *store, lsm_entry *entry, lsm_str *data); + #endif diff --git a/lsm/include/lsm/str.h b/lsm/include/lsm/str.h index 52659c1..bc18fa9 100644 --- a/lsm/include/lsm/str.h +++ b/lsm/include/lsm/str.h @@ -151,4 +151,12 @@ lsm_error lsm_str_truncate(lsm_str *s, uint64_t new_len); */ lsm_error lsm_str_split(lsm_str *s, lsm_str *s2, uint64_t index); +/** + * Append s2 to s. s2 is left untouched. + * + * @param s string to append s2 to + * @param s2 string to append to s + */ +lsm_error lsm_str_append(lsm_str *s, lsm_str *s2); + #endif diff --git a/lsm/src/_include/lsm/store_internal.h b/lsm/src/_include/lsm/store_internal.h index b4a833c..e77e879 100644 --- a/lsm/src/_include/lsm/store_internal.h +++ b/lsm/src/_include/lsm/store_internal.h @@ -1,6 +1,8 @@ #ifndef LSM_STORE_INTERNAL #define LSM_STORE_INTERNAL +#include + #include "lsm/store.h" #include "lsm/str_internal.h" #include "lsm/trie.h" @@ -11,17 +13,26 @@ typedef struct lsm_attr { } lsm_attr; struct lsm_entry { - lsm_str key; + lsm_str *key; struct { uint64_t count; uint64_t bitmap; lsm_attr *items; } attrs; - lsm_str data; + struct { + uint64_t len; + union { + FILE *file; + char *ptr; + } value; + bool on_disk; + } data; }; struct lsm_store { lsm_trie *trie; + char *data_path; + char *db_path; }; #endif diff --git a/lsm/src/store/lsm_store.c b/lsm/src/store/lsm_store.c index 8ba5740..d69f42d 100644 --- a/lsm/src/store/lsm_store.c +++ b/lsm/src/store/lsm_store.c @@ -1,6 +1,81 @@ +#include +#include + +#include "lsm.h" #include "lsm/store.h" +#include "lsm/trie.h" #include "lsm/store_internal.h" -/* lsm_error lsm_store_init(lsm_store **ptr) { */ -/* lsm_store *store = */ -/* } */ +lsm_error lsm_store_init(lsm_store **ptr) { + lsm_store *store = calloc(1, sizeof(lsm_store)); + + if (store == NULL) { + return lsm_error_failed_alloc; + } + + lsm_error res = lsm_trie_init(&store->trie); + + if (res != lsm_error_ok) { + free(store); + + return res; + } + + *ptr = store; + + return lsm_error_ok; +} + +lsm_error lsm_store_open(lsm_store **ptr, char *db_path, char *data_path) { + lsm_store *store; + LSM_RES(lsm_store_init(&store)); + + // TODO implement all of reading the db file + + store->db_path = db_path; + store->data_path = data_path; + + *ptr = store; + + return lsm_error_ok; +} + +lsm_error lsm_store_search(lsm_entry **out, lsm_store *store, lsm_str *key) { + return lsm_trie_search((void **)out, store->trie, key); +} + +lsm_error lsm_store_insert(lsm_entry **out, lsm_store *store, lsm_str *key) { + lsm_entry *entry; + + LSM_RES(lsm_entry_init(&entry)); + LSM_RES(lsm_trie_insert(store->trie, key, entry)); + + entry->key = key; + *out = entry; + + return lsm_error_ok; +} + +lsm_error lsm_store_data_append(lsm_store *store, lsm_entry *entry, lsm_str *data) { + uint64_t new_len = entry->data.len + lsm_str_len(data); + + // Data is in memory and still fits -> keep it in memory + if ((new_len <= LSM_STORE_DISK_THRESHOLD) && (!entry->data.on_disk)) { + char *buf = realloc(entry->data.value.ptr, new_len * sizeof(char)); + + if (buf == NULL) { + return lsm_error_failed_alloc; + } + + memcpy(&buf[entry->data.len], lsm_str_ptr(data), lsm_str_len(data)); + entry->data.value.ptr = buf; + entry->data.len = new_len; + } + // Data will end up on disk + else { + // Data is not yet on disk, so we create the file + if (!entry->data.on_disk) { + + } + } +} diff --git a/lsm/src/store/lsm_store_entry.c b/lsm/src/store/lsm_store_entry.c index aa1c7fc..a64614e 100644 --- a/lsm/src/store/lsm_store_entry.c +++ b/lsm/src/store/lsm_store_entry.c @@ -27,15 +27,15 @@ lsm_error lsm_entry_attr_get(lsm_str **out, lsm_entry *entry, return lsm_error_not_found; } - for (uint64_t i = 0; i < entry->attrs.count; i++) { - if (entry->attrs.items[i].type == type) { - *out = entry->attrs.items[i].str; + uint64_t i = 0; - return lsm_error_ok; - } + while (entry->attrs.items[i].type != type) { + i++; } - return lsm_error_not_found; + *out = entry->attrs.items[i].str; + + return lsm_error_ok; } lsm_error lsm_entry_attr_remove(lsm_str **out, lsm_entry *entry, diff --git a/lsm/src/str/lsm_str.c b/lsm/src/str/lsm_str.c index 2244e52..aea5e02 100644 --- a/lsm/src/str/lsm_str.c +++ b/lsm/src/str/lsm_str.c @@ -196,3 +196,40 @@ bool lsm_str_eq(lsm_str *s1, lsm_str *s2) { return memcmp(lsm_str_ptr(s1), lsm_str_ptr(s2), s1->len) == 0; } + +lsm_error lsm_str_append(lsm_str *s, lsm_str *s2) { + if (s2->len == 0) { + return lsm_error_ok; + } + + uint64_t new_len = s->len + s2->len; + + if (new_len <= 8) { + memcpy(&s->data.val[s->len], s2->data.val, s2->len); + } else { + char *buf; + + if (s->len <= 8) { + buf = malloc(new_len * sizeof(char)); + + if (buf == NULL) { + return lsm_error_failed_alloc; + } + + memcpy(buf, s->data.val, s->len); + } else { + buf = realloc(s->data.ptr, new_len * sizeof(char)); + + if (buf == NULL) { + return lsm_error_failed_alloc; + } + } + + memcpy(&buf[s->len], lsm_str_ptr(s2), s2->len); + s->data.ptr = buf; + } + + s->len += s2->len; + + return lsm_error_ok; +} From aab93d97413fc2235665fe1bb7586e392e1aa124 Mon Sep 17 00:00:00 2001 From: Jef Roosens Date: Wed, 25 Oct 2023 11:19:28 +0200 Subject: [PATCH 14/70] chore(lsm): test cenny gpg key --- lsm/src/store/lsm_store.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lsm/src/store/lsm_store.c b/lsm/src/store/lsm_store.c index d69f42d..2dc7405 100644 --- a/lsm/src/store/lsm_store.c +++ b/lsm/src/store/lsm_store.c @@ -78,4 +78,6 @@ lsm_error lsm_store_data_append(lsm_store *store, lsm_entry *entry, lsm_str *dat } } + + return lsm_error_ok; } From 0e4e18da6cce3479c0992a1a28f7d0077196df40 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Sat, 28 Oct 2023 15:48:28 +0200 Subject: [PATCH 15/70] feat(lsm): data streaming, random other stuff, locks --- lsm/include/lsm.h | 14 +++-- lsm/include/lsm/store.h | 38 ++++++++++--- lsm/src/_include/lsm/store_internal.h | 12 +++- lsm/src/store/lsm_store.c | 82 ++++++++++++++++++++++++--- lsm/src/store/lsm_store_entry.c | 15 +++++ 5 files changed, 137 insertions(+), 24 deletions(-) diff --git a/lsm/include/lsm.h b/lsm/include/lsm.h index f5ca373..eaecb7e 100644 --- a/lsm/include/lsm.h +++ b/lsm/include/lsm.h @@ -3,11 +3,11 @@ #include -#define LSM_RES(x) \ - { \ - lsm_error res = x; \ - if (res != lsm_error_ok) \ - return res; \ +#define LSM_RES(x) \ + { \ + lsm_error res = x; \ + if (res != lsm_error_ok) \ + return res; \ } typedef enum lsm_error { @@ -15,7 +15,9 @@ typedef enum lsm_error { lsm_error_failed_alloc = 1, lsm_error_not_found = 2, lsm_error_already_present = 3, - lsm_error_null_value = 4 + lsm_error_null_value = 4, + lsm_error_failed_io = 5, + lsm_error_lock_busy = 6, } lsm_error; /*typedef struct lsm_string { */ diff --git a/lsm/include/lsm/store.h b/lsm/include/lsm/store.h index 7edcc03..188fd18 100644 --- a/lsm/include/lsm/store.h +++ b/lsm/include/lsm/store.h @@ -104,7 +104,7 @@ lsm_error lsm_store_init(lsm_store **ptr); * @param db_path path to the database file * @param data_path path to the data directory */ -lsm_error lsm_store_open(lsm_store **ptr, char *db_path, char *data_path); +lsm_error lsm_store_open(lsm_store **ptr, lsm_str *db_path, lsm_str *data_path); /** * Dealocate an existing lsm_store object. @@ -114,16 +114,37 @@ lsm_error lsm_store_open(lsm_store **ptr, char *db_path, char *data_path); void lsm_store_free(lsm_store *store); /** - * Search for an entry in the store. + * Retrieve an entry from the store, preparing & locking it for the purpose of + * reading. * - * @param out pointer to store entry pointer in - * @param store store to search in - * @param key key to look with + * @param out pointer to store entry pointer + * @param store store to retrieve entry from + * @param key key to search */ -lsm_error lsm_store_search(lsm_entry **out, lsm_store *store, lsm_str *key); +lsm_error lsm_store_get_read(lsm_entry **out, lsm_store *store, lsm_str *key); /** - * Allocate a new entry in the store with the specified key. + * Retrieve an entry from the store for the purposes of writing. This + * write-locks the entry. + * + * @param out pointer to store entry pointer + * @param store store to retrieve entry from + * @param key key to search + */ +lsm_error lsm_store_get_write(lsm_entry **out, lsm_store *store, lsm_str *key); + +/** + * Unlock a locked entry. + * + * @param store store to unlock entry in + * @param entry entry to unlock + */ +lsm_error lsm_store_unlock(lsm_store *store, lsm_entry *entry); + +/** + * Allocate a new entry in the store with the specified key. The entry returned + * will be write-locked, and should be unlocked after streaming the necessary + * data. * * @param out pointer to store new entry pointer in * @param store store to modify @@ -141,6 +162,7 @@ lsm_error lsm_store_insert(lsm_entry **out, lsm_store *store, lsm_str *key); * @param entry entry to append data to * @param data data to append */ -lsm_error lsm_store_data_append(lsm_store *store, lsm_entry *entry, lsm_str *data); +lsm_error lsm_store_data_write(lsm_store *store, lsm_entry *entry, + lsm_str *data); #endif diff --git a/lsm/src/_include/lsm/store_internal.h b/lsm/src/_include/lsm/store_internal.h index e77e879..d2c8d1f 100644 --- a/lsm/src/_include/lsm/store_internal.h +++ b/lsm/src/_include/lsm/store_internal.h @@ -1,6 +1,7 @@ #ifndef LSM_STORE_INTERNAL #define LSM_STORE_INTERNAL +#include #include #include "lsm/store.h" @@ -29,10 +30,17 @@ struct lsm_entry { } data; }; +typedef struct lsm_entry_wrapper { + pthread_rwlock_t lock; + lsm_entry *entry; +} lsm_entry_wrapper; + +lsm_error lsm_entry_wrapper_init(lsm_entry_wrapper **ptr); + struct lsm_store { lsm_trie *trie; - char *data_path; - char *db_path; + lsm_str *data_path; + lsm_str *db_path; }; #endif diff --git a/lsm/src/store/lsm_store.c b/lsm/src/store/lsm_store.c index 2dc7405..1fff684 100644 --- a/lsm/src/store/lsm_store.c +++ b/lsm/src/store/lsm_store.c @@ -1,10 +1,11 @@ +#include #include #include #include "lsm.h" #include "lsm/store.h" -#include "lsm/trie.h" #include "lsm/store_internal.h" +#include "lsm/trie.h" lsm_error lsm_store_init(lsm_store **ptr) { lsm_store *store = calloc(1, sizeof(lsm_store)); @@ -26,7 +27,8 @@ lsm_error lsm_store_init(lsm_store **ptr) { return lsm_error_ok; } -lsm_error lsm_store_open(lsm_store **ptr, char *db_path, char *data_path) { +lsm_error lsm_store_open(lsm_store **ptr, lsm_str *db_path, + lsm_str *data_path) { lsm_store *store; LSM_RES(lsm_store_init(&store)); @@ -40,24 +42,67 @@ lsm_error lsm_store_open(lsm_store **ptr, char *db_path, char *data_path) { return lsm_error_ok; } -lsm_error lsm_store_search(lsm_entry **out, lsm_store *store, lsm_str *key) { - return lsm_trie_search((void **)out, store->trie, key); +lsm_error lsm_store_get_read(lsm_entry **out, lsm_store *store, lsm_str *key) { + lsm_entry_wrapper *wrapper; + + LSM_RES(lsm_trie_search((void **)&wrapper, store->trie, key)); + + // We don't want to block the thread + if (pthread_rwlock_tryrdlock(&wrapper->lock) != 0) { + return lsm_error_lock_busy; + } + + lsm_entry *entry = wrapper->entry; + + // While the trie's data field will never be NULL, the actual entry pointer + // might be + if (entry == NULL) { + pthread_rwlock_unlock(&wrapper->lock); + + return lsm_error_not_found; + } + + // Open a new file descriptor if needed + if (entry->data.on_disk && (entry->data.value.file == NULL)) { + char path[store->data_path->len + entry->key->len + 2]; + sprintf(path, "%s/%s", lsm_str_ptr(store->data_path), + lsm_str_ptr(entry->key)); + + FILE *f = fopen(path, "rb"); + + if (f == NULL) { + return lsm_error_failed_io; + } + + entry->data.value.file = f; + } + + return lsm_error_ok; } lsm_error lsm_store_insert(lsm_entry **out, lsm_store *store, lsm_str *key) { - lsm_entry *entry; + lsm_entry_wrapper *wrapper; + LSM_RES(lsm_entry_wrapper_init(&wrapper)); + lsm_entry *entry; LSM_RES(lsm_entry_init(&entry)); - LSM_RES(lsm_trie_insert(store->trie, key, entry)); entry->key = key; + wrapper->entry = entry; + pthread_rwlock_wrlock(&wrapper->lock); + + // TODO mem leak if already present + LSM_RES(lsm_trie_insert(store->trie, key, wrapper)); + *out = entry; return lsm_error_ok; } -lsm_error lsm_store_data_append(lsm_store *store, lsm_entry *entry, lsm_str *data) { +lsm_error lsm_store_data_write(lsm_store *store, lsm_entry *entry, + lsm_str *data) { uint64_t new_len = entry->data.len + lsm_str_len(data); + const char *data_s = lsm_str_ptr(data); // Data is in memory and still fits -> keep it in memory if ((new_len <= LSM_STORE_DISK_THRESHOLD) && (!entry->data.on_disk)) { @@ -67,7 +112,7 @@ lsm_error lsm_store_data_append(lsm_store *store, lsm_entry *entry, lsm_str *dat return lsm_error_failed_alloc; } - memcpy(&buf[entry->data.len], lsm_str_ptr(data), lsm_str_len(data)); + memcpy(&buf[entry->data.len], data_s, lsm_str_len(data)); entry->data.value.ptr = buf; entry->data.len = new_len; } @@ -75,7 +120,28 @@ lsm_error lsm_store_data_append(lsm_store *store, lsm_entry *entry, lsm_str *dat else { // Data is not yet on disk, so we create the file if (!entry->data.on_disk) { + char path[store->data_path->len + entry->key->len + 2]; + sprintf(path, "%s/%s", lsm_str_ptr(store->data_path), + lsm_str_ptr(entry->key)); + FILE *f = fopen(path, "w"); + + if (f == NULL) { + return lsm_error_failed_io; + } + + entry->data.value.file = f; + entry->data.on_disk = true; + + // TODO free old buff, write original data to file + } + + size_t written = 0; + + // TODO what happens when I/O fails? + while (written < data->len) { + written += fwrite(&data_s[written], sizeof(char), data->len - written, + entry->data.value.file); } } diff --git a/lsm/src/store/lsm_store_entry.c b/lsm/src/store/lsm_store_entry.c index a64614e..4a83be2 100644 --- a/lsm/src/store/lsm_store_entry.c +++ b/lsm/src/store/lsm_store_entry.c @@ -1,4 +1,5 @@ #include +#include #include #include @@ -17,6 +18,20 @@ lsm_error lsm_entry_init(lsm_entry **ptr) { return lsm_error_ok; } +lsm_error lsm_entry_wrapper_init(lsm_entry_wrapper **ptr) { + lsm_entry_wrapper *wrap = calloc(1, sizeof(lsm_entry_wrapper)); + + if (wrap == NULL) { + return lsm_error_failed_alloc; + } + + pthread_rwlock_init(&wrap->lock, NULL); + + *ptr = wrap; + + return lsm_error_ok; +} + bool lsm_entry_attr_present(lsm_entry *entry, lsm_attr_type type) { return (entry->attrs.bitmap & type) != 0; } From f44c5120999f6e410be56aff77fbd5c7a14ba9bf Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Sun, 29 Oct 2023 12:19:59 +0100 Subject: [PATCH 16/70] feat(lsm): introduce entry handles for concurrent access --- lsm/include/lsm/store.h | 71 +++++++--------- lsm/src/_include/lsm/store_internal.h | 33 +++++++- lsm/src/store/lsm_store.c | 115 ++++++++++++++++++++++---- lsm/src/store/lsm_store_entry.c | 44 ++++++++-- 4 files changed, 195 insertions(+), 68 deletions(-) diff --git a/lsm/include/lsm/store.h b/lsm/include/lsm/store.h index 188fd18..7e7e59a 100644 --- a/lsm/include/lsm/store.h +++ b/lsm/include/lsm/store.h @@ -21,27 +21,10 @@ typedef enum lsm_attr_type : uint64_t { } lsm_attr_type; /** - * An entry inside an LSM store. - * - * Each entry consists of the key it's stored behind, zero or more attributes - * (metadata) and a data field. The data field can be stored on disk or - * in-memory, depending on the size. + * A handle referencing an entry inside a store. Read/write operations from/to + * the entry go through this handle. */ -typedef struct lsm_entry lsm_entry; - -/** - * Allocate and initialize a new lsm_entry object. - * - * @param ptr where to store newly allocated pointer - */ -lsm_error lsm_entry_init(lsm_entry **ptr); - -/** - * Deallocate an existing lsm_entry object. - * - * @param entry object to deallocate - */ -void lsm_entry_free(lsm_entry *entry); +typedef struct lsm_entry_handle lsm_entry_handle; /** * Checks whether the entry has an attribute with the specified type. @@ -49,7 +32,7 @@ void lsm_entry_free(lsm_entry *entry); * @param entry entry to check * @param type type of attribute to check for */ -bool lsm_entry_attr_present(lsm_entry *entry, lsm_attr_type type); +bool lsm_entry_attr_present(lsm_entry_handle *handle, lsm_attr_type type); /** * Retrieve the contents of an attribute from an entry, if present @@ -58,7 +41,7 @@ bool lsm_entry_attr_present(lsm_entry *entry, lsm_attr_type type); * @param entry entry to search for * @param type type of attribute to return */ -lsm_error lsm_entry_attr_get(lsm_str **out, lsm_entry *entry, +lsm_error lsm_entry_attr_get(lsm_str **out, lsm_entry_handle *handle, lsm_attr_type type); /** @@ -68,7 +51,7 @@ lsm_error lsm_entry_attr_get(lsm_str **out, lsm_entry *entry, * @param type type of attribute to add * @param data data of attribute; ownership of pointer is taken over */ -lsm_error lsm_entry_attr_insert(lsm_entry *entry, lsm_attr_type type, +lsm_error lsm_entry_attr_insert(lsm_entry_handle *handle, lsm_attr_type type, lsm_str *data); /** @@ -79,7 +62,7 @@ lsm_error lsm_entry_attr_insert(lsm_entry *entry, lsm_attr_type type, * @param entry entry to remove attribute from * @param type type of attribute to remove */ -lsm_error lsm_entry_attr_remove(lsm_str **out, lsm_entry *entry, +lsm_error lsm_entry_attr_remove(lsm_str **out, lsm_entry_handle *handle, lsm_attr_type type); /** @@ -104,7 +87,7 @@ lsm_error lsm_store_init(lsm_store **ptr); * @param db_path path to the database file * @param data_path path to the data directory */ -lsm_error lsm_store_open(lsm_store **ptr, lsm_str *db_path, lsm_str *data_path); +lsm_error lsm_store_load(lsm_store **ptr, lsm_str *db_path, lsm_str *data_path); /** * Dealocate an existing lsm_store object. @@ -114,43 +97,45 @@ lsm_error lsm_store_open(lsm_store **ptr, lsm_str *db_path, lsm_str *data_path); void lsm_store_free(lsm_store *store); /** - * Retrieve an entry from the store, preparing & locking it for the purpose of - * reading. + * Open a read handle to the given entry. This entry must be properly closed + * using `lsm_store_handle_close`. * - * @param out pointer to store entry pointer + * @param out pointer to store handle pointer * @param store store to retrieve entry from * @param key key to search */ -lsm_error lsm_store_get_read(lsm_entry **out, lsm_store *store, lsm_str *key); +lsm_error lsm_store_open_read(lsm_entry_handle **out, lsm_store *store, + lsm_str *key); /** - * Retrieve an entry from the store for the purposes of writing. This - * write-locks the entry. + * Open a write handle to the given entry. This entry must be properly closed + * using `lsm_store_handle_close`. * - * @param out pointer to store entry pointer + * @param out pointer to store handle pointer * @param store store to retrieve entry from * @param key key to search */ -lsm_error lsm_store_get_write(lsm_entry **out, lsm_store *store, lsm_str *key); +lsm_error lsm_store_open_write(lsm_entry_handle **out, lsm_store *store, + lsm_str *key); /** - * Unlock a locked entry. + * Close an open entry handle. * - * @param store store to unlock entry in - * @param entry entry to unlock + * @param store store the handle's entry is stored in + * @param handle handle to close */ -lsm_error lsm_store_unlock(lsm_store *store, lsm_entry *entry); +void lsm_entry_close(lsm_entry_handle *handle); /** - * Allocate a new entry in the store with the specified key. The entry returned - * will be write-locked, and should be unlocked after streaming the necessary - * data. + * Insert a new entry into the store, returning a write handle to the newly + * created entry. * * @param out pointer to store new entry pointer in * @param store store to modify * @param key key to add; ownership of key pointer is taken over */ -lsm_error lsm_store_insert(lsm_entry **out, lsm_store *store, lsm_str *key); +lsm_error lsm_store_insert(lsm_entry_handle **out, lsm_store *store, + lsm_str *key); /** * Append new data to the given entry, which is expected to be in the store. @@ -162,7 +147,7 @@ lsm_error lsm_store_insert(lsm_entry **out, lsm_store *store, lsm_str *key); * @param entry entry to append data to * @param data data to append */ -lsm_error lsm_store_data_write(lsm_store *store, lsm_entry *entry, - lsm_str *data); +lsm_error lsm_entry_data_append(lsm_store *store, lsm_entry_handle *handle, + lsm_str *data); #endif diff --git a/lsm/src/_include/lsm/store_internal.h b/lsm/src/_include/lsm/store_internal.h index d2c8d1f..27b6e5b 100644 --- a/lsm/src/_include/lsm/store_internal.h +++ b/lsm/src/_include/lsm/store_internal.h @@ -13,7 +13,14 @@ typedef struct lsm_attr { lsm_str *str; } lsm_attr; -struct lsm_entry { +/** + * An entry inside an LSM store. + * + * Each entry consists of the key it's stored behind, zero or more attributes + * (metadata) and a data field. The data field can be stored on disk or + * in-memory, depending on the size. + */ +typedef struct lsm_entry { lsm_str *key; struct { uint64_t count; @@ -28,7 +35,21 @@ struct lsm_entry { } value; bool on_disk; } data; -}; +} lsm_entry; + +/** + * Allocate and initialize a new lsm_entry object. + * + * @param ptr where to store newly allocated pointer + */ +lsm_error lsm_entry_init(lsm_entry **ptr); + +/** + * Deallocate an existing lsm_entry object. + * + * @param entry object to deallocate + */ +void lsm_entry_free(lsm_entry *entry); typedef struct lsm_entry_wrapper { pthread_rwlock_t lock; @@ -36,6 +57,14 @@ typedef struct lsm_entry_wrapper { } lsm_entry_wrapper; lsm_error lsm_entry_wrapper_init(lsm_entry_wrapper **ptr); +void lsm_entry_wrapper_free(lsm_entry_wrapper *wrapper); + +struct lsm_entry_handle { + lsm_entry_wrapper *wrapper; + FILE *f; +}; + +lsm_error lsm_entry_handle_init(lsm_entry_handle **out); struct lsm_store { lsm_trie *trie; diff --git a/lsm/src/store/lsm_store.c b/lsm/src/store/lsm_store.c index 1fff684..27ebf68 100644 --- a/lsm/src/store/lsm_store.c +++ b/lsm/src/store/lsm_store.c @@ -27,7 +27,7 @@ lsm_error lsm_store_init(lsm_store **ptr) { return lsm_error_ok; } -lsm_error lsm_store_open(lsm_store **ptr, lsm_str *db_path, +lsm_error lsm_store_load(lsm_store **ptr, lsm_str *db_path, lsm_str *data_path) { lsm_store *store; LSM_RES(lsm_store_init(&store)); @@ -42,12 +42,13 @@ lsm_error lsm_store_open(lsm_store **ptr, lsm_str *db_path, return lsm_error_ok; } -lsm_error lsm_store_get_read(lsm_entry **out, lsm_store *store, lsm_str *key) { +lsm_error lsm_store_open_read(lsm_entry_handle **out, lsm_store *store, + lsm_str *key) { lsm_entry_wrapper *wrapper; LSM_RES(lsm_trie_search((void **)&wrapper, store->trie, key)); - // We don't want to block the thread + // Try to get a read lock on the entry's lock if (pthread_rwlock_tryrdlock(&wrapper->lock) != 0) { return lsm_error_lock_busy; } @@ -62,8 +63,17 @@ lsm_error lsm_store_get_read(lsm_entry **out, lsm_store *store, lsm_str *key) { return lsm_error_not_found; } + lsm_entry_handle *handle; + lsm_error res = lsm_entry_handle_init(&handle); + + if (res != lsm_error_ok) { + pthread_rwlock_unlock(&wrapper->lock); + + return res; + } + // Open a new file descriptor if needed - if (entry->data.on_disk && (entry->data.value.file == NULL)) { + if (entry->data.on_disk) { char path[store->data_path->len + entry->key->len + 2]; sprintf(path, "%s/%s", lsm_str_ptr(store->data_path), lsm_str_ptr(entry->key)); @@ -71,36 +81,111 @@ lsm_error lsm_store_get_read(lsm_entry **out, lsm_store *store, lsm_str *key) { FILE *f = fopen(path, "rb"); if (f == NULL) { + free(handle); + return lsm_error_failed_io; } - entry->data.value.file = f; + handle->f = f; } + handle->wrapper = wrapper; + *out = handle; + return lsm_error_ok; } -lsm_error lsm_store_insert(lsm_entry **out, lsm_store *store, lsm_str *key) { +lsm_error lsm_store_open_write(lsm_entry_handle **out, lsm_store *store, + lsm_str *key) { + lsm_entry_wrapper *wrapper; + + LSM_RES(lsm_trie_search((void **)&wrapper, store->trie, key)); + + // Try to get a write lock on the entry's lock + // TODO make this timeout to not block + if (pthread_rwlock_wrlock(&wrapper->lock) != 0) { + return lsm_error_lock_busy; + } + + lsm_entry *entry = wrapper->entry; + + // While the trie's data field will never be NULL, the actual entry pointer + // might be + if (entry == NULL) { + pthread_rwlock_unlock(&wrapper->lock); + + return lsm_error_not_found; + } + + lsm_entry_handle *handle; + lsm_error res = lsm_entry_handle_init(&handle); + + if (res != lsm_error_ok) { + pthread_rwlock_unlock(&wrapper->lock); + + return res; + } + + // Open a new file descriptor if needed + if (entry->data.on_disk) { + char path[store->data_path->len + entry->key->len + 2]; + sprintf(path, "%s/%s", lsm_str_ptr(store->data_path), + lsm_str_ptr(entry->key)); + + FILE *f = fopen(path, "wb"); + + if (f == NULL) { + free(handle); + + return lsm_error_failed_io; + } + + handle->f = f; + } + + handle->wrapper = wrapper; + *out = handle; + + return lsm_error_ok; +} + +lsm_error lsm_store_insert(lsm_entry_handle **out, lsm_store *store, + lsm_str *key) { + // TODO what happens when two inserts to the same key happen at the same time? lsm_entry_wrapper *wrapper; LSM_RES(lsm_entry_wrapper_init(&wrapper)); + pthread_rwlock_wrlock(&wrapper->lock); + + lsm_error res = lsm_trie_insert(store->trie, key, wrapper); + + // Check if entry isn't already present in advance + if (res != lsm_error_ok) { + lsm_entry_wrapper_free(wrapper); + + return res; + } lsm_entry *entry; LSM_RES(lsm_entry_init(&entry)); entry->key = key; wrapper->entry = entry; - pthread_rwlock_wrlock(&wrapper->lock); - // TODO mem leak if already present - LSM_RES(lsm_trie_insert(store->trie, key, wrapper)); + lsm_entry_handle *handle; + LSM_RES(lsm_entry_handle_init(&handle)); - *out = entry; + // No need to set the handle's file, as the entry doesn't have any data yet + handle->wrapper = wrapper; + + *out = handle; return lsm_error_ok; } -lsm_error lsm_store_data_write(lsm_store *store, lsm_entry *entry, - lsm_str *data) { +lsm_error lsm_entry_data_append(lsm_store *store, lsm_entry_handle *handle, + lsm_str *data) { + lsm_entry *entry = handle->wrapper->entry; + uint64_t new_len = entry->data.len + lsm_str_len(data); const char *data_s = lsm_str_ptr(data); @@ -124,13 +209,13 @@ lsm_error lsm_store_data_write(lsm_store *store, lsm_entry *entry, sprintf(path, "%s/%s", lsm_str_ptr(store->data_path), lsm_str_ptr(entry->key)); - FILE *f = fopen(path, "w"); + FILE *f = fopen(path, "ab"); if (f == NULL) { return lsm_error_failed_io; } - entry->data.value.file = f; + handle->f = f; entry->data.on_disk = true; // TODO free old buff, write original data to file @@ -141,7 +226,7 @@ lsm_error lsm_store_data_write(lsm_store *store, lsm_entry *entry, // TODO what happens when I/O fails? while (written < data->len) { written += fwrite(&data_s[written], sizeof(char), data->len - written, - entry->data.value.file); + handle->f); } } diff --git a/lsm/src/store/lsm_store_entry.c b/lsm/src/store/lsm_store_entry.c index 4a83be2..ffbc138 100644 --- a/lsm/src/store/lsm_store_entry.c +++ b/lsm/src/store/lsm_store_entry.c @@ -32,16 +32,40 @@ lsm_error lsm_entry_wrapper_init(lsm_entry_wrapper **ptr) { return lsm_error_ok; } -bool lsm_entry_attr_present(lsm_entry *entry, lsm_attr_type type) { - return (entry->attrs.bitmap & type) != 0; +void lsm_entry_wrapper_free(lsm_entry_wrapper *wrapper) { free(wrapper); } + +lsm_error lsm_entry_handle_init(lsm_entry_handle **out) { + lsm_entry_handle *handle = calloc(1, sizeof(lsm_entry_handle)); + + if (handle == NULL) { + return lsm_error_failed_alloc; + } + + *out = handle; + + return lsm_error_ok; } -lsm_error lsm_entry_attr_get(lsm_str **out, lsm_entry *entry, +void lsm_entry_close(lsm_entry_handle *handle) { + if (handle->f != NULL) { + fclose(handle->f); + } + + pthread_rwlock_unlock(&handle->wrapper->lock); + free(handle); +} + +bool lsm_entry_attr_present(lsm_entry_handle *handle, lsm_attr_type type) { + return (handle->wrapper->entry->attrs.bitmap & type) != 0; +} + +lsm_error lsm_entry_attr_get(lsm_str **out, lsm_entry_handle *handle, lsm_attr_type type) { - if (!lsm_entry_attr_present(entry, type)) { + if (!lsm_entry_attr_present(handle, type)) { return lsm_error_not_found; } + lsm_entry *entry = handle->wrapper->entry; uint64_t i = 0; while (entry->attrs.items[i].type != type) { @@ -53,12 +77,14 @@ lsm_error lsm_entry_attr_get(lsm_str **out, lsm_entry *entry, return lsm_error_ok; } -lsm_error lsm_entry_attr_remove(lsm_str **out, lsm_entry *entry, +lsm_error lsm_entry_attr_remove(lsm_str **out, lsm_entry_handle *handle, lsm_attr_type type) { - if (!lsm_entry_attr_present(entry, type)) { + if (!lsm_entry_attr_present(handle, type)) { return lsm_error_not_found; } + lsm_entry *entry = handle->wrapper->entry; + if (entry->attrs.count == 1) { *out = entry->attrs.items[0].str; @@ -99,12 +125,14 @@ lsm_error lsm_entry_attr_remove(lsm_str **out, lsm_entry *entry, return lsm_error_ok; } -lsm_error lsm_entry_attr_insert(lsm_entry *entry, lsm_attr_type type, +lsm_error lsm_entry_attr_insert(lsm_entry_handle *handle, lsm_attr_type type, lsm_str *data) { - if (lsm_entry_attr_present(entry, type)) { + if (lsm_entry_attr_present(handle, type)) { return lsm_error_already_present; } + lsm_entry *entry = handle->wrapper->entry; + lsm_attr *new_attrs = realloc(entry->attrs.items, (entry->attrs.count + 1) * sizeof(lsm_attr)); From f19a8814f5796b974d6c9e898820247f4284437c Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Sun, 29 Oct 2023 12:33:07 +0100 Subject: [PATCH 17/70] fix(lsm): write in-memory data to disk when switching to file --- lsm/include/lsm/store.h | 5 +++-- lsm/src/store/lsm_store.c | 22 ++++++++++++++++++---- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/lsm/include/lsm/store.h b/lsm/include/lsm/store.h index 7e7e59a..1ddf6cc 100644 --- a/lsm/include/lsm/store.h +++ b/lsm/include/lsm/store.h @@ -108,8 +108,9 @@ lsm_error lsm_store_open_read(lsm_entry_handle **out, lsm_store *store, lsm_str *key); /** - * Open a write handle to the given entry. This entry must be properly closed - * using `lsm_store_handle_close`. + * Open a write handle to the given entry. This handle should only be used for + * writing; read operations on this handle are unsupported. This entry must be + * properly closed using `lsm_store_handle_close`. * * @param out pointer to store handle pointer * @param store store to retrieve entry from diff --git a/lsm/src/store/lsm_store.c b/lsm/src/store/lsm_store.c index 27ebf68..42643b9 100644 --- a/lsm/src/store/lsm_store.c +++ b/lsm/src/store/lsm_store.c @@ -132,7 +132,7 @@ lsm_error lsm_store_open_write(lsm_entry_handle **out, lsm_store *store, sprintf(path, "%s/%s", lsm_str_ptr(store->data_path), lsm_str_ptr(entry->key)); - FILE *f = fopen(path, "wb"); + FILE *f = fopen(path, "ab"); if (f == NULL) { free(handle); @@ -184,6 +184,10 @@ lsm_error lsm_store_insert(lsm_entry_handle **out, lsm_store *store, lsm_error lsm_entry_data_append(lsm_store *store, lsm_entry_handle *handle, lsm_str *data) { + if (lsm_str_len(data) == 0) { + return lsm_error_ok; + } + lsm_entry *entry = handle->wrapper->entry; uint64_t new_len = entry->data.len + lsm_str_len(data); @@ -199,7 +203,6 @@ lsm_error lsm_entry_data_append(lsm_store *store, lsm_entry_handle *handle, memcpy(&buf[entry->data.len], data_s, lsm_str_len(data)); entry->data.value.ptr = buf; - entry->data.len = new_len; } // Data will end up on disk else { @@ -215,10 +218,19 @@ lsm_error lsm_entry_data_append(lsm_store *store, lsm_entry_handle *handle, return lsm_error_failed_io; } + size_t written = 0; + + // Write original in-memory data to file + while (written < entry->data.len) { + written += fwrite(&entry->data.value.ptr[written], sizeof(char), + entry->data.len - written, f); + } + + free(entry->data.value.ptr); + entry->data.value.ptr = NULL; + handle->f = f; entry->data.on_disk = true; - - // TODO free old buff, write original data to file } size_t written = 0; @@ -230,5 +242,7 @@ lsm_error lsm_entry_data_append(lsm_store *store, lsm_entry_handle *handle, } } + entry->data.len = new_len; + return lsm_error_ok; } From 8b2117a66c285e426c083c25dc038684ea795325 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Sun, 29 Oct 2023 13:41:16 +0100 Subject: [PATCH 18/70] fix(lsm): account for empty entries when appending data --- lsm/src/store/lsm_store.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/lsm/src/store/lsm_store.c b/lsm/src/store/lsm_store.c index 42643b9..cd6a45b 100644 --- a/lsm/src/store/lsm_store.c +++ b/lsm/src/store/lsm_store.c @@ -195,7 +195,14 @@ lsm_error lsm_entry_data_append(lsm_store *store, lsm_entry_handle *handle, // Data is in memory and still fits -> keep it in memory if ((new_len <= LSM_STORE_DISK_THRESHOLD) && (!entry->data.on_disk)) { - char *buf = realloc(entry->data.value.ptr, new_len * sizeof(char)); + char *buf; + + // Entries with no data do not have an allocated buffer yet + if (entry->data.len == 0) { + buf = malloc(new_len * sizeof(char)); + } else { + buf = realloc(entry->data.value.ptr, new_len * sizeof(char)); + } if (buf == NULL) { return lsm_error_failed_alloc; @@ -218,17 +225,23 @@ lsm_error lsm_entry_data_append(lsm_store *store, lsm_entry_handle *handle, return lsm_error_failed_io; } - size_t written = 0; + // If there was data present in memory already, we sync this to disk. + // This check is required because it's possible that more than the + // treshold is written to an empty entry immediately, meaning there's no + // allocated memory buffer present. + if (entry->data.len > 0) { + size_t written = 0; - // Write original in-memory data to file - while (written < entry->data.len) { - written += fwrite(&entry->data.value.ptr[written], sizeof(char), - entry->data.len - written, f); + // Write original in-memory data to file + while (written < entry->data.len) { + written += fwrite(&entry->data.value.ptr[written], sizeof(char), + entry->data.len - written, f); + } + + free(entry->data.value.ptr); + entry->data.value.ptr = NULL; } - free(entry->data.value.ptr); - entry->data.value.ptr = NULL; - handle->f = f; entry->data.on_disk = true; } From 1c421c1e678b06f73892b873011888c49eeea9d2 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Sun, 29 Oct 2023 13:47:39 +0100 Subject: [PATCH 19/70] chore(lsm): remove outdated files --- lsm/src/lsm_store.c | 23 --------------- lsm/src/lsm_store.h | 12 -------- lsm/src/lsm_store_node.c | 63 ---------------------------------------- lsm/src/lsm_store_node.h | 43 --------------------------- lsm/src/trie/lsm_trie.c | 1 - 5 files changed, 142 deletions(-) delete mode 100644 lsm/src/lsm_store.c delete mode 100644 lsm/src/lsm_store.h delete mode 100644 lsm/src/lsm_store_node.c delete mode 100644 lsm/src/lsm_store_node.h diff --git a/lsm/src/lsm_store.c b/lsm/src/lsm_store.c deleted file mode 100644 index 1a5a445..0000000 --- a/lsm/src/lsm_store.c +++ /dev/null @@ -1,23 +0,0 @@ -#include - -#include "lsm.h" -#include "lsm_store.h" - -/** - * Initialize a new lsm_store struct. - * - * @param lsm_store pointer to where to store the newly allocated object's - * pointer - * @return success of the function - */ -/* lsm_error lsm_store_init(lsm_store **ptr) { */ -/* lsm_store *store = calloc(1, sizeof(lsm_store)); */ - -/* if (store == NULL) { */ -/* return lsm_error_failed_alloc; */ -/* } */ - -/* *ptr = store; */ - -/* return lsm_error_ok; */ -/* } */ diff --git a/lsm/src/lsm_store.h b/lsm/src/lsm_store.h deleted file mode 100644 index c73b2e9..0000000 --- a/lsm/src/lsm_store.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef LSM_STORE_INTERNAL -#define LSM_STORE_INTERNAL - -#include "lsm.h" -#include "lsm_store_node.h" - -struct lsm_store { - lsm_store_node *root; - uint64_t size; -}; - -#endif diff --git a/lsm/src/lsm_store_node.c b/lsm/src/lsm_store_node.c deleted file mode 100644 index b50f15e..0000000 --- a/lsm/src/lsm_store_node.c +++ /dev/null @@ -1,63 +0,0 @@ -#include - -#include "lsm.h" -#include "lsm_store_node.h" - -lsm_error lsm_store_inode_init(lsm_store_inode **ptr, const char c) { - lsm_store_inode *node = calloc(1, sizeof(lsm_store_inode)); - - if (node == NULL) { - return lsm_error_failed_alloc; - } - - node->key = c; - *ptr = node; - - return lsm_error_ok; -} - -lsm_error lsm_store_node_init(lsm_store_node **ptr) { - lsm_store_node *node = calloc(1, sizeof(lsm_store_node)); - - if (node == NULL) { - return lsm_error_failed_alloc; - } - - *ptr = node; - - return lsm_error_ok; -} - -lsm_error lsm_store_node_search(lsm_store_node **out_ptr, lsm_store_node *node, - const char c) { - if (node->size == 0) { - return lsm_error_not_found; - } - - lsm_store_inode *parent = node->root; - lsm_store_inode *child; - lsm_store_node *out = NULL; - - while (1) { - if (parent->key == c) { - out = parent->next; - break; - } - - child = (c < parent->key) ? parent->left : parent->right; - - if (child == NULL) { - break; - } - - parent = child; - }; - - if (out == NULL) { - return lsm_error_not_found; - } - - *out_ptr = out; - - return lsm_error_ok; -} diff --git a/lsm/src/lsm_store_node.h b/lsm/src/lsm_store_node.h deleted file mode 100644 index 826b312..0000000 --- a/lsm/src/lsm_store_node.h +++ /dev/null @@ -1,43 +0,0 @@ -#ifndef LSM_STORE_NODE_INTERNAL -#define LSM_STORE_NODE_INTERNAL - -#include "lsm.h" - -/** - * A node inside a store node's internal binary tree. - */ -typedef struct lsm_store_inode { - struct lsm_store_inode *left; - struct lsm_store_inode *right; - struct lsm_store_node *next; - char key; -} lsm_store_inode; - -/** - * Initialize a new lsm_store_inode. - */ -lsm_error lsm_store_inode_init(lsm_store_inode **ptr, const char c); - -/** - * A node inside the store's trie structure. Internally, each node manages a - * binary tree. - */ -typedef struct lsm_store_node { - /* lsm_entry *entry; */ - lsm_store_inode *root; - uint8_t size; - /* lsm_string skip; */ -} lsm_store_node; - -/** - * Initialize a new lsm_store_node. - */ -lsm_error lsm_store_node_init(lsm_store_node **out); - -/** - * Search for the next node following the given character, if present. - */ -lsm_error lsm_store_node_search(lsm_store_node **out, lsm_store_node *node, - const char c); - -#endif diff --git a/lsm/src/trie/lsm_trie.c b/lsm/src/trie/lsm_trie.c index b6e190a..8744b4e 100644 --- a/lsm/src/trie/lsm_trie.c +++ b/lsm/src/trie/lsm_trie.c @@ -1,4 +1,3 @@ -#include #include #include "lsm.h" From 1461956d980cff9e33cb200d33394398111137cf Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Sun, 29 Oct 2023 13:58:42 +0100 Subject: [PATCH 20/70] chore(lsm): add example binary support to Makefile --- lsm/Makefile | 24 +++++++++++++++++++++--- lsm/config.mk | 4 +++- lsm/example/test.c | 5 +++++ 3 files changed, 29 insertions(+), 4 deletions(-) create mode 100644 lsm/example/test.c diff --git a/lsm/Makefile b/lsm/Makefile index 5352623..c853ed9 100644 --- a/lsm/Makefile +++ b/lsm/Makefile @@ -9,13 +9,17 @@ SRCS != find '$(SRC_DIR)' -iname '*.c' SRCS_H != find $(INC_DIRS) -iname '*.h' SRCS_H_INTERNAL != find $(SRC_DIR) -iname '*.h' SRCS_TEST != find '$(TEST_DIR)' -iname '*.c' +SRCS_EXAMPLE != find '$(EXAMPLE_DIR)' -iname '*.c' OBJS := $(SRCS:%=$(BUILD_DIR)/%.o) OBJS_TEST := $(SRCS_TEST:%=$(BUILD_DIR)/%.o) -DEPS := $(SRCS:%=$(BUILD_DIR)/%.d) $(SRCS_TEST:%=$(BUILD_DIR)/%.d) +OBJS_EXAMPLE := $(SRCS_EXAMPLE:%=$(BUILD_DIR)/%.o) +DEPS := $(SRCS:%=$(BUILD_DIR)/%.d) $(SRCS_TEST:%=$(BUILD_DIR)/%.d) $(SRCS_EXAMPLE:%=$(BUILD_DIR)/%.d) BINS_TEST := $(OBJS_TEST:%.c.o=%) +BINS_EXAMPLE := $(OBJS_EXAMPLE:%.c.o=%) TARGETS_TEST := $(BINS_TEST:%=test-%) +TARGETS_EXAMPLE := $(BINS_EXAMPLE:%=test-%) TARGETS_MEM_TEST := $(BINS_TEST:%=test-mem-%) _CFLAGS := $(addprefix -I,$(INC_DIRS)) $(CFLAGS) -Wall -Wextra @@ -71,14 +75,27 @@ $(BUILD_DIR)/$(TEST_DIR)/%.c.o: $(TEST_DIR)/%.c -I$(dir $(@:$(BUILD_DIR)/$(TEST_DIR)/%=$(SRC_DIR)/%)) \ -c $< -o $@ +# =====EXAMPLES===== +.PHONY: build-example +build-example: $(BINS_EXAMPLE) + +$(BINS_EXAMPLE): %: %.c.o $(LIB) + $(CC) \ + $^ -o $@ + +# Example binaries link the resulting library +$(BUILD_DIR)/$(EXAMPLE_DIR)/%.c.o: $(EXAMPLE_DIR)/%.c + mkdir -p $(dir $@) + $(CC) $(_CFLAGS) -I$(PUB_INC_DIR) -c $< -o $@ + # =====MAINTENANCE===== .PHONY: lint lint: - clang-format -n --Werror $(SRCS) $(SRCS_H) $(SRCS_H_INTERNAL) + clang-format -n --Werror $(SRCS) $(SRCS_H) $(SRCS_H_INTERNAL) $(SRCS_EXAMPLE) .PHONY: fmt fmt: - clang-format -i $(SRCS) $(SRCS_H) $(SRCS_H_INTERNAL) + clang-format -i $(SRCS) $(SRCS_H) $(SRCS_H_INTERNAL) $(SRCS_EXAMPLE) .PHONY: clean clean: @@ -89,6 +106,7 @@ clean: bear: clean bear -- make bear --append -- make build-test + bear --append -- make build-example # Make make aware of the .d files diff --git a/lsm/config.mk b/lsm/config.mk index 310b7c4..4a7502c 100644 --- a/lsm/config.mk +++ b/lsm/config.mk @@ -3,7 +3,9 @@ LIB_FILENAME = liblsm.a BUILD_DIR = build SRC_DIR = src TEST_DIR = test -INC_DIRS = include src/_include +EXAMPLE_DIR = example +PUB_INC_DIR = include +INC_DIRS = $(PUB_INC_DIR) src/_include # -MMD: generate a .d file for every source file. This file can be imported by # make and makes make aware that a header file has been changed, ensuring an diff --git a/lsm/example/test.c b/lsm/example/test.c new file mode 100644 index 0000000..13b8738 --- /dev/null +++ b/lsm/example/test.c @@ -0,0 +1,5 @@ +#include + +#include "lsm.h" + +int main() { printf("yuh"); } From fc4187e6ce8327c81404a52d14d3bdb6350199b2 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Sun, 29 Oct 2023 14:41:40 +0100 Subject: [PATCH 21/70] feat(lsm): add entry data reading --- lsm/example/test.c | 43 ++++++++++++++++++++++++++- lsm/include/lsm/store.h | 12 ++++++++ lsm/src/_include/lsm/store_internal.h | 1 + lsm/src/store/lsm_store.c | 31 +++++++++++++++++++ 4 files changed, 86 insertions(+), 1 deletion(-) diff --git a/lsm/example/test.c b/lsm/example/test.c index 13b8738..2a7e3d3 100644 --- a/lsm/example/test.c +++ b/lsm/example/test.c @@ -1,5 +1,46 @@ +#include #include #include "lsm.h" +#include "lsm/store.h" +#include "lsm/str.h" -int main() { printf("yuh"); } +int main() { + lsm_str *db_path, *data_dir; + lsm_str_init_copy(&db_path, "data/data.db"); + lsm_str_init_copy(&data_dir, "data"); + + lsm_store *store; + lsm_store_load(&store, db_path, data_dir); + + lsm_str *key; + lsm_str_init_copy(&key, "key"); + + lsm_entry_handle *handle; + assert(lsm_store_insert(&handle, store, key) == lsm_error_ok); + + lsm_str *data; + lsm_str_init_copy(&data, "hello"); + + for (int i = 0; i < 50; i++) { + lsm_entry_data_append(store, handle, data); + } + + lsm_entry_close(handle); + + assert(lsm_store_open_read(&handle, store, key) == lsm_error_ok); + + char buf[24]; + uint64_t read; + uint64_t total = 0; + + lsm_entry_data_read(&read, buf, handle, 24); + total += read; + + while (read > 0) { + printf("%.*s", read, buf); + lsm_entry_data_read(&read, buf, handle, 24); + total += read; + } + printf("\n%lu", total); +} diff --git a/lsm/include/lsm/store.h b/lsm/include/lsm/store.h index 1ddf6cc..d7e2e83 100644 --- a/lsm/include/lsm/store.h +++ b/lsm/include/lsm/store.h @@ -151,4 +151,16 @@ lsm_error lsm_store_insert(lsm_entry_handle **out, lsm_store *store, lsm_error lsm_entry_data_append(lsm_store *store, lsm_entry_handle *handle, lsm_str *data); +/** + * Read a number of bytes from the entry's data field. The position from which + * data is read is dependent on previous read calls. + * + * @param out where to write how many bytes were read + * @param buf buffer to store read data in + * @param handle entry handle to read from + * @param len how many bytes to read at most + */ +lsm_error lsm_entry_data_read(uint64_t *out, char *buf, + lsm_entry_handle *handle, uint64_t len); + #endif diff --git a/lsm/src/_include/lsm/store_internal.h b/lsm/src/_include/lsm/store_internal.h index 27b6e5b..c8bad4c 100644 --- a/lsm/src/_include/lsm/store_internal.h +++ b/lsm/src/_include/lsm/store_internal.h @@ -62,6 +62,7 @@ void lsm_entry_wrapper_free(lsm_entry_wrapper *wrapper); struct lsm_entry_handle { lsm_entry_wrapper *wrapper; FILE *f; + uint64_t pos; }; lsm_error lsm_entry_handle_init(lsm_entry_handle **out); diff --git a/lsm/src/store/lsm_store.c b/lsm/src/store/lsm_store.c index cd6a45b..26e6af7 100644 --- a/lsm/src/store/lsm_store.c +++ b/lsm/src/store/lsm_store.c @@ -259,3 +259,34 @@ lsm_error lsm_entry_data_append(lsm_store *store, lsm_entry_handle *handle, return lsm_error_ok; } + +lsm_error lsm_entry_data_read(uint64_t *out, char *buf, + lsm_entry_handle *handle, uint64_t len) { + lsm_entry *entry = handle->wrapper->entry; + + if (entry->data.len == 0) { + *out = 0; + + return lsm_error_ok; + } + + uint64_t read; + + if (entry->data.on_disk) { + read = fread(buf, sizeof(char), len, handle->f); + + if ((read == 0) && (ferror(handle->f) != 0)) { + return lsm_error_failed_io; + } + } else { + read = (entry->data.len - handle->pos) < len + ? (entry->data.len - handle->pos) + : len; + memcpy(buf, &entry->data.value.ptr[handle->pos], read * sizeof(char)); + } + + handle->pos += read; + *out = read; + + return lsm_error_ok; +} From 6d74c8c5503bd197ae99f4cfdfda341a5e300863 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Mon, 30 Oct 2023 21:14:06 +0100 Subject: [PATCH 22/70] feat(http): fully decouple HTTP loop functionality --- CHANGELOG.md | 2 ++ include/http_loop.h | 36 ++++++++++++++++++++++++++++++----- include/lander.h | 17 +++++++++++++++++ src/http_loop/http_loop.c | 16 +++++++++++++++- src/http_loop/http_loop_ctx.c | 4 ++++ src/lander/lander.c | 8 ++++++++ src/lander/lander_get.c | 7 ++++--- src/lander/lander_post.c | 12 ++++++++---- src/main.c | 18 ++++++++++-------- 9 files changed, 99 insertions(+), 21 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0709613..91784d4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased](https://git.rustybever.be/Chewing_Bever/lander/src/branch/dev) +* Fully decoupled HTTP loop functionnality + ## [0.1.0](https://git.rustybever.be/Chewing_Bever/lander/src/tag/0.1.0) ### Added diff --git a/include/http_loop.h b/include/http_loop.h index b207d51..486ccbf 100644 --- a/include/http_loop.h +++ b/include/http_loop.h @@ -52,9 +52,12 @@ typedef struct http_route { typedef struct http_loop_gctx { http_route *routes; size_t route_count; - Trie *trie; + void *(*custom_ctx_init)(); + void (*custom_ctx_reset)(void *); + void (*custom_ctx_free)(void *); const char *api_key; - const char *data_dir; + // Custom global context + void *c; } http_loop_gctx; /** @@ -73,6 +76,7 @@ typedef struct http_loop_ctx { http_route *route; size_t current_step; http_loop_gctx *g; + void *c; } http_loop_ctx; /** @@ -98,6 +102,11 @@ void http_loop_ctx_reset(http_loop_ctx *ctx); */ void http_loop_ctx_free(http_loop_ctx *ctx); +/** + * Represents an HTTP loop + */ +typedef struct event_loop http_loop; + /** * Process incoming data as an HTTP request. This is the "handle_data" function * for the event loop. @@ -174,10 +183,27 @@ bool http_loop_step_switch_res(event_loop_conn *conn); /** * Initialize a new http loop. * - * @param gctx global context for the event loop + * @param routes array of routes that should be served + * @parma route_count how many elements are in `routes` + * @param custom_gctx the application's custom global context; can be NULL + * @param custom_ctx_init function to initialize a new custom context + * @param custom_ctx_reset function to reset a custom context + * @param custom_ctx_free function to free a custom context; will always be run + * after a reset * @return pointer to the newly allocated object */ -event_loop *http_loop_init(http_loop_gctx *gctx); +http_loop *http_loop_init(http_route *routes, size_t route_count, + void *custom_gctx, void *(*custom_ctx_init)(), + void(custom_ctx_reset)(void *), + void(custom_ctx_free)(void *)); + +/** + * Set the API key the authentication steps should use. + * + * @param hl HTTP loop to set key in + * @param api_key API key to use + */ +void http_loop_set_api_key(http_loop *hl, const char *api_key); /** * Run the HTTP loop. This function never returns. @@ -185,6 +211,6 @@ event_loop *http_loop_init(http_loop_gctx *gctx); * @param el the event loop * @param port on what port to listen */ -void http_loop_run(event_loop *el, int port); +void http_loop_run(http_loop *hl, int port); #endif diff --git a/include/lander.h b/include/lander.h index daf6fe7..f44870b 100644 --- a/include/lander.h +++ b/include/lander.h @@ -5,6 +5,23 @@ extern http_route lander_routes[4]; +typedef struct lander_gctx { + const char *data_dir; + Trie *trie; +} lander_gctx; + +typedef struct lander_ctx { + char *key; +} lander_ctx; + +void *lander_gctx_init(); + +void *lander_ctx_init(); + +void lander_ctx_reset(lander_ctx *ctx); + +void lander_ctx_free(lander_ctx *ctx); + bool lander_get_index(event_loop_conn *conn); bool lander_get_entry(event_loop_conn *conn); diff --git a/src/http_loop/http_loop.c b/src/http_loop/http_loop.c index e5c7d20..536f89d 100644 --- a/src/http_loop/http_loop.c +++ b/src/http_loop/http_loop.c @@ -46,18 +46,32 @@ bool http_loop_handle_request(event_loop_conn *conn) { return conn->state == event_loop_conn_state_req; } -event_loop *http_loop_init(http_loop_gctx *gctx) { +event_loop *http_loop_init(http_route *routes, size_t route_count, + void *custom_gctx, void *(*custom_ctx_init)(), + void(custom_ctx_reset)(), void(custom_ctx_free)()) { event_loop *el = event_loop_init(); el->ctx_init = (void *(*)(void *))http_loop_ctx_init; el->ctx_free = (void (*)(void *))http_loop_ctx_free; el->handle_data = http_loop_handle_request; el->write_data = http_loop_write_response; + + http_loop_gctx *gctx = http_loop_gctx_init(); + gctx->c = custom_gctx; + gctx->routes = routes; + gctx->route_count = route_count; + gctx->custom_ctx_init = custom_ctx_init; + gctx->custom_ctx_reset = custom_ctx_reset; + gctx->custom_ctx_free = custom_ctx_free; el->gctx = gctx; return el; } +void http_loop_set_api_key(http_loop *hl, const char *api_key) { + ((http_loop_gctx *)hl->gctx)->api_key = api_key; +} + void http_loop_run(event_loop *el, int port) { debug("Compiling RegEx routes"); diff --git a/src/http_loop/http_loop_ctx.c b/src/http_loop/http_loop_ctx.c index b820725..8d0db1e 100644 --- a/src/http_loop/http_loop_ctx.c +++ b/src/http_loop/http_loop_ctx.c @@ -12,12 +12,14 @@ http_loop_gctx *http_loop_gctx_init() { http_loop_ctx *http_loop_ctx_init(http_loop_gctx *g) { http_loop_ctx *ctx = calloc(sizeof(http_loop_ctx), 1); ctx->g = g; + ctx->c = g->custom_ctx_init(); return ctx; } void http_loop_ctx_free(http_loop_ctx *ctx) { http_loop_ctx_reset(ctx); + ctx->g->custom_ctx_free(ctx->c); free(ctx); } @@ -45,4 +47,6 @@ void http_loop_ctx_reset(http_loop_ctx *ctx) { ctx->res.status = 0; ctx->res.head_len = 0; ctx->res.head_written = 0; + + ctx->g->custom_ctx_reset(ctx->c); } diff --git a/src/lander/lander.c b/src/lander/lander.c index 60ee83d..9606326 100644 --- a/src/lander/lander.c +++ b/src/lander/lander.c @@ -22,3 +22,11 @@ http_route lander_routes[] = { .steps = {http_loop_step_auth, lander_post_paste, http_loop_step_body_to_file, http_loop_step_switch_res, NULL}}, }; + +void *lander_gctx_init() { return calloc(1, sizeof(lander_gctx)); } + +void *lander_ctx_init() { return calloc(1, sizeof(lander_ctx)); } + +void lander_ctx_reset(lander_ctx *ctx) {} + +void lander_ctx_free(lander_ctx *ctx) { free(ctx); } diff --git a/src/lander/lander_get.c b/src/lander/lander_get.c index e4ba39b..b139d09 100644 --- a/src/lander/lander_get.c +++ b/src/lander/lander_get.c @@ -24,12 +24,13 @@ bool lander_get_index(event_loop_conn *conn) { bool lander_get_entry(event_loop_conn *conn) { http_loop_ctx *ctx = conn->ctx; + lander_gctx *c_gctx = ctx->g->c; const char *key = &ctx->req.path[ctx->req.regex_groups[1].rm_so]; int key_len = ctx->req.regex_groups[1].rm_eo - ctx->req.regex_groups[1].rm_so; Entry *entry; - TrieExitCode res = trie_search_len(ctx->g->trie, &entry, key, key_len); + TrieExitCode res = trie_search_len(c_gctx->trie, &entry, key, key_len); if (res == NotFound) { ctx->res.status = http_not_found; @@ -37,8 +38,8 @@ bool lander_get_entry(event_loop_conn *conn) { ctx->res.status = http_moved_permanently; http_res_add_header(&ctx->res, http_header_location, entry->string, false); } else if (entry->type == Paste) { - char fname[strlen(ctx->g->data_dir) + 8 + key_len + 1]; - sprintf(fname, "%s/pastes/%.*s", ctx->g->data_dir, key_len, key); + char fname[strlen(c_gctx->data_dir) + 8 + key_len + 1]; + sprintf(fname, "%s/pastes/%.*s", c_gctx->data_dir, key_len, key); http_res_set_body_file(&ctx->res, fname); // TODO don't call everything a text file diff --git a/src/lander/lander_post.c b/src/lander/lander_post.c index f1b686c..9288929 100644 --- a/src/lander/lander_post.c +++ b/src/lander/lander_post.c @@ -5,6 +5,8 @@ // TODO entry leaks if key is already present static bool add_entry(char **key_ptr, int *key_len_ptr, http_loop_ctx *ctx, Entry *entry, bool random) { + lander_gctx *c_gctx = ctx->g->c; + // The first match group matches the "long" path bool secure = (ctx->req.regex_groups[1].rm_eo - ctx->req.regex_groups[1].rm_so) == 1; @@ -14,7 +16,7 @@ static bool add_entry(char **key_ptr, int *key_len_ptr, http_loop_ctx *ctx, TrieExitCode res; if (random) { - res = trie_add_random(ctx->g->trie, &key, entry, secure); + res = trie_add_random(c_gctx->trie, &key, entry, secure); if (res == Ok) { key_len = strlen(key); @@ -23,7 +25,7 @@ static bool add_entry(char **key_ptr, int *key_len_ptr, http_loop_ctx *ctx, key = (char *)&ctx->req.path[ctx->req.regex_groups[2].rm_so]; key_len = ctx->req.regex_groups[2].rm_eo - ctx->req.regex_groups[2].rm_so; - res = trie_add_len(ctx->g->trie, key, key_len, entry); + res = trie_add_len(c_gctx->trie, key, key_len, entry); } switch (res) { @@ -89,6 +91,8 @@ bool lander_post_redirect(event_loop_conn *conn) { bool lander_post_paste(event_loop_conn *conn) { http_loop_ctx *ctx = conn->ctx; + lander_gctx *c_gctx = ctx->g->c; + bool random = ctx->req.regex_groups[2].rm_eo == ctx->req.regex_groups[2].rm_so; @@ -102,8 +106,8 @@ bool lander_post_paste(event_loop_conn *conn) { return true; } - char *fname = malloc(strlen(ctx->g->data_dir) + 8 + key_len + 1); - sprintf(fname, "%s/pastes/%.*s", ctx->g->data_dir, key_len, key); + char *fname = malloc(strlen(c_gctx->data_dir) + 8 + key_len + 1); + sprintf(fname, "%s/pastes/%.*s", c_gctx->data_dir, key_len, key); ctx->req.body.fname = fname; ctx->req.body.fname_owned = true; diff --git a/src/main.c b/src/main.c index f32f83f..cbefd01 100644 --- a/src/main.c +++ b/src/main.c @@ -45,13 +45,15 @@ int main() { info("Trie initialized and populated with %i entries", trie_size(trie)); - http_loop_gctx *gctx = http_loop_gctx_init(); - gctx->trie = trie; - gctx->routes = lander_routes; - gctx->route_count = sizeof(lander_routes) / sizeof(lander_routes[0]); - gctx->api_key = api_key; - gctx->data_dir = data_dir; - event_loop *el = http_loop_init(gctx); + lander_gctx *c_gctx = lander_gctx_init(); + c_gctx->data_dir = data_dir; + c_gctx->trie = trie; - http_loop_run(el, port); + http_loop *hl = http_loop_init( + lander_routes, sizeof(lander_routes) / sizeof(lander_routes[0]), c_gctx, + lander_ctx_init, (void (*)(void *))lander_ctx_reset, + (void (*)(void *))lander_ctx_free); + http_loop_set_api_key(hl, api_key); + + http_loop_run(hl, port); } From afd18d3a37ee4285e77612b683a3b3ce8367dc3a Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Thu, 2 Nov 2023 10:27:34 +0100 Subject: [PATCH 23/70] feat(http): add custom processing to responses using response steps --- CHANGELOG.md | 6 ++- Makefile | 10 +++- include/http_loop.h | 39 ++++++++++----- src/http_loop/http_loop.c | 2 +- src/http_loop/http_loop_req.c | 14 +++--- src/http_loop/http_loop_res.c | 89 ++++++++++++++++++++++------------- src/lander/lander.c | 34 ++++++++----- 7 files changed, 128 insertions(+), 66 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 91784d4..3872f31 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased](https://git.rustybever.be/Chewing_Bever/lander/src/branch/dev) -* Fully decoupled HTTP loop functionnality +* HTTP Loop + * Fully decoupled functionality from Lander-specific code + * Users can now define custom global & request-local contexts + * Introduced "response steps", allowing custom code during the response + part of a request ## [0.1.0](https://git.rustybever.be/Chewing_Bever/lander/src/tag/0.1.0) diff --git a/Makefile b/Makefile index 1206b6c..1dc0dce 100644 --- a/Makefile +++ b/Makefile @@ -43,7 +43,7 @@ libtrie: liblsm: $(MAKE) -C lsm -.PHONY: bin +.PHONY: $(BIN) $(BIN): libtrie liblsm $(OBJS) $(CC) -o $@ $(OBJS) $(_LDFLAGS) @@ -58,11 +58,17 @@ $(BUILD_DIR)/$(THIRDPARTY_DIR)/%.c.o: $(THIRDPARTY_DIR)/%.c # =====TESTING===== .PHONY: run -run: bin +run: $(BIN) LANDER_API_KEY=test \ LANDER_DATA_DIR=data \ '$(BUILD_DIR)/$(BIN_FILENAME)' +.PHONY: valgrind +valgrind: $(BIN) + LANDER_API_KEY=test \ + LANDER_DATA_DIR=data \ + valgrind '$(BUILD_DIR)/$(BIN_FILENAME)' + .PHONY: test test: $(TARGETS_TEST) diff --git a/include/http_loop.h b/include/http_loop.h index 486ccbf..c25becf 100644 --- a/include/http_loop.h +++ b/include/http_loop.h @@ -27,9 +27,10 @@ typedef enum http_route_type { * Function describing a step in a route's processing. * * @param conn connection to process - * @return whether the processing can immediately advance to the next step. A - * step should return false if it's e.g. waiting for I/O, and can therefore not - * finish its task in the current cycle of the event loop. + * @return whether processing can proceed to the next step without performing + * I/O first. For a request step, `false` means more data needs to be read + * before the step can finish its processing. For response steps, `false` means + * there's new data in the write buffer that needs to be written. */ typedef bool (*step)(event_loop_conn *conn); @@ -44,6 +45,7 @@ typedef struct http_route { // starting the http loop regex_t *regex; step steps[HTTP_LOOP_MAX_STEPS]; + step steps_res[HTTP_LOOP_MAX_STEPS]; } http_route; /** @@ -116,14 +118,6 @@ typedef struct event_loop http_loop; */ bool http_loop_handle_request(event_loop_conn *conn); -/** - * Write the HTTP response to the file descriptor. This is the "write_data" - * function for the event loop. - * - * @param conn connection to process - */ -void http_loop_write_response(event_loop_conn *conn); - /** * Try to parse the incoming data as an HTTP request. * @@ -148,6 +142,14 @@ void http_loop_route_request(event_loop_conn *conn); */ void http_loop_process_request(event_loop_conn *conn); +/** + * Handles the response processing. This is the `write_data` function for the + * event loop. + * + * @param conn connection to process + */ +void http_loop_handle_response(event_loop_conn *conn); + /** * Request step that consumes the request body and stores it in a buffer. * @@ -180,6 +182,21 @@ bool http_loop_step_auth(event_loop_conn *conn); */ bool http_loop_step_switch_res(event_loop_conn *conn); +/** + * Write the HTTP header back to the connection. If `res->head` is not set, a + * header will be generated for you. + * + * @param conn connection to process + */ +bool http_loop_step_write_header(event_loop_conn *conn); + +/** + * Write the HTTP body back to the connection. + * + * @param conn connection to process + */ +bool http_loop_step_write_body(event_loop_conn *conn); + /** * Initialize a new http loop. * diff --git a/src/http_loop/http_loop.c b/src/http_loop/http_loop.c index 536f89d..dc76061 100644 --- a/src/http_loop/http_loop.c +++ b/src/http_loop/http_loop.c @@ -54,7 +54,7 @@ event_loop *http_loop_init(http_route *routes, size_t route_count, el->ctx_init = (void *(*)(void *))http_loop_ctx_init; el->ctx_free = (void (*)(void *))http_loop_ctx_free; el->handle_data = http_loop_handle_request; - el->write_data = http_loop_write_response; + el->write_data = http_loop_handle_response; http_loop_gctx *gctx = http_loop_gctx_init(); gctx->c = custom_gctx; diff --git a/src/http_loop/http_loop_req.c b/src/http_loop/http_loop_req.c index 09b0ed4..76e15d5 100644 --- a/src/http_loop/http_loop_req.c +++ b/src/http_loop/http_loop_req.c @@ -138,13 +138,11 @@ void http_loop_process_request(event_loop_conn *conn) { ctx->current_step++; } - if (conn->state != event_loop_conn_state_req) { - return; - } - - // If we've reached the end of the list of step functions, we report the - // request as finished by clearing its route - if (ctx->route->steps[ctx->current_step] == NULL) { - http_loop_ctx_reset(ctx); + // Request processing can stop early by switching the connection state + // Either way, we reset the step counter as it will be used by the response + // steps + if ((conn->state != event_loop_conn_state_req) || + (ctx->route->steps[ctx->current_step] == NULL)) { + ctx->current_step = 0; } } diff --git a/src/http_loop/http_loop_res.c b/src/http_loop/http_loop_res.c index d2ae029..7cbf9d3 100644 --- a/src/http_loop/http_loop_res.c +++ b/src/http_loop/http_loop_res.c @@ -48,7 +48,7 @@ void http_loop_init_header(http_response *res) { res->head_len = buf_size + 1; } -void http_loop_write_response(event_loop_conn *conn) { +bool http_loop_step_write_header(event_loop_conn *conn) { http_response *res = &((http_loop_ctx *)conn->ctx)->res; // Create head response @@ -56,43 +56,68 @@ void http_loop_write_response(event_loop_conn *conn) { http_loop_init_header(res); } - // The final iteration marks the end of the response, after which we reset the - // context so a next request can be processed - if (res->head_written == res->head_len && - res->body.expected_len == res->body.len) { - http_loop_ctx_reset(conn->ctx); - conn->state = event_loop_conn_state_req; - return; + // Step has finished its work + if (res->head_written == res->head_len) { + return true; } - if (res->head_written < res->head_len) { - size_t bytes_to_write = MIN(res->head_len - res->head_written, - EVENT_LOOP_BUFFER_SIZE - conn->wbuf_size); - memcpy(&conn->wbuf[conn->wbuf_size], &res->head[res->head_written], + size_t bytes_to_write = MIN(res->head_len - res->head_written, + EVENT_LOOP_BUFFER_SIZE - conn->wbuf_size); + memcpy(&conn->wbuf[conn->wbuf_size], &res->head[res->head_written], + bytes_to_write); + + conn->wbuf_size += bytes_to_write; + res->head_written += bytes_to_write; + + return false; +} + +bool http_loop_step_write_body(event_loop_conn *conn) { + http_response *res = &((http_loop_ctx *)conn->ctx)->res; + + if (res->body.expected_len == res->body.len) { + return true; + } + + size_t bytes_to_write = MIN(res->body.expected_len - res->body.len, + EVENT_LOOP_BUFFER_SIZE - conn->wbuf_size); + + size_t bytes_written; + + switch (res->body.type) { + case http_body_buf: + memcpy(&conn->wbuf[conn->wbuf_size], &(res->body.buf)[res->body.len], bytes_to_write); - conn->wbuf_size += bytes_to_write; - res->head_written += bytes_to_write; + res->body.len += bytes_to_write; + break; + case http_body_file: + bytes_written = fread(&conn->wbuf[conn->wbuf_size], sizeof(uint8_t), + bytes_to_write, res->body.file); + conn->wbuf_size += bytes_written; + res->body.len += bytes_written; + break; } - if (res->body.len < res->body.expected_len) { - size_t bytes_to_write = MIN(res->body.expected_len - res->body.len, - EVENT_LOOP_BUFFER_SIZE - conn->wbuf_size); - size_t bytes_written; + return false; +} - switch (res->body.type) { - case http_body_buf: - memcpy(&conn->wbuf[conn->wbuf_size], &(res->body.buf)[res->body.len], - bytes_to_write); - conn->wbuf_size += bytes_to_write; - res->body.len += bytes_to_write; - break; - case http_body_file: - bytes_written = fread(&conn->wbuf[conn->wbuf_size], sizeof(uint8_t), - bytes_to_write, res->body.file); - conn->wbuf_size += bytes_written; - res->body.len += bytes_written; - break; - } +void http_loop_handle_response(event_loop_conn *conn) { + http_loop_ctx *ctx = conn->ctx; + + while ((conn->state == event_loop_conn_state_res) && + (ctx->route->steps_res[ctx->current_step] != NULL) && + ctx->route->steps_res[ctx->current_step](conn)) { + ctx->current_step++; + } + + // Response processing can stop early be switching the connection state + // After response processing has finished its work, we reset the context to + // prepare for a new request + if ((conn->state != event_loop_conn_state_res) || + (ctx->route->steps_res[ctx->current_step] == NULL)) { + http_loop_ctx_reset(ctx); + + conn->state = event_loop_conn_state_req; } } diff --git a/src/lander/lander.c b/src/lander/lander.c index 9606326..5a86f76 100644 --- a/src/lander/lander.c +++ b/src/lander/lander.c @@ -6,21 +6,33 @@ http_route lander_routes[] = { {.type = http_route_literal, .method = http_get, .path = "/", - .steps = {lander_get_index, NULL}}, - {.type = http_route_regex, - .method = http_get, - .path = "^/([^/]+)$", - .steps = {lander_get_entry, NULL}}, - {.type = http_route_regex, - .method = http_post, - .path = "^/s(l?)/([^/]*)$", - .steps = {http_loop_step_auth, http_loop_step_body_to_buf, - lander_post_redirect, NULL}}, + .steps = {lander_get_index, NULL}, + .steps_res = {http_loop_step_write_header, http_loop_step_write_body, + NULL}}, + { + .type = http_route_regex, + .method = http_get, + .path = "^/([^/]+)$", + .steps = {lander_get_entry, NULL}, + .steps_res = {http_loop_step_write_header, http_loop_step_write_body, + NULL}, + }, + { + .type = http_route_regex, + .method = http_post, + .path = "^/s(l?)/([^/]*)$", + .steps = {http_loop_step_auth, http_loop_step_body_to_buf, + lander_post_redirect, NULL}, + .steps_res = {http_loop_step_write_header, http_loop_step_write_body, + NULL}, + }, {.type = http_route_regex, .method = http_post, .path = "^/p(l?)/([^/]*)$", .steps = {http_loop_step_auth, lander_post_paste, - http_loop_step_body_to_file, http_loop_step_switch_res, NULL}}, + http_loop_step_body_to_file, http_loop_step_switch_res, NULL}, + .steps_res = {http_loop_step_write_header, http_loop_step_write_body, + NULL}}, }; void *lander_gctx_init() { return calloc(1, sizeof(lander_gctx)); } From 7a21bed2b2ede1b44bd7110796f8884ad333f8eb Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Thu, 2 Nov 2023 13:08:36 +0100 Subject: [PATCH 24/70] feat(lsm): add str copy_n functions --- lsm/include/lsm/str.h | 23 ++++++++++++++++++++++- lsm/src/str/lsm_str.c | 20 +++++++++++++++++++- 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/lsm/include/lsm/str.h b/lsm/include/lsm/str.h index bc18fa9..01f2651 100644 --- a/lsm/include/lsm/str.h +++ b/lsm/include/lsm/str.h @@ -36,6 +36,17 @@ lsm_error lsm_str_init_zero(lsm_str **ptr); */ lsm_error lsm_str_init_copy(lsm_str **ptr, char *s); +/** + * Same as `lsm_str_init_copy`, except that it takes an additional argument + * specifying the length of the string to copy over. This can be used to more + * easily "cut" parts of a C-style string out into an LSM string. + * + * @param ptr pointer to store newly allocated pointer + * @param s string to copy into lsm string + * @param len length of string to copy + */ +lsm_error lsm_str_init_copy_n(lsm_str **ptr, char *s, uint64_t len); + /** * Overwrite an existing lsm_str so it now represents the new provided string. * The string pointer of the original object is free'd if needed. Ownership of @@ -52,10 +63,20 @@ void lsm_str_overwrite(lsm_str *str, char *s); * string is copied, leaving the original untouched. * * @param str lsm_str object to modify - * @param s string to convert into lsm string; ownership is taken over + * @param s string to copy into lsm string */ lsm_error lsm_str_overwrite_copy(lsm_str *str, char *s); +/** + * Same as `lsm_str_overwrite_copy`, except the length is explicitely specified, + * allowing you to easily "cut" parts of a C string out into an LSM string. + * + * @param str lsm_str object to modify + * @param s string to copy into lsm string + * @param len length of the string to copy + */ +lsm_error lsm_str_overwrite_copy_n(lsm_str *str, char *s, uint64_t len); + /** * Deallocate the existing internal string if needed and replace the lsm_str * with a string of length 0, wiping its contents. This function can be used as diff --git a/lsm/src/str/lsm_str.c b/lsm/src/str/lsm_str.c index aea5e02..afab724 100644 --- a/lsm/src/str/lsm_str.c +++ b/lsm/src/str/lsm_str.c @@ -47,6 +47,20 @@ lsm_error lsm_str_init_copy(lsm_str **ptr, char *s) { return lsm_error_ok; } +lsm_error lsm_str_init_copy_n(lsm_str **ptr, char *s, uint64_t len) { + lsm_str *str = calloc(1, sizeof(lsm_str)); + + if (str == NULL) { + return lsm_error_failed_alloc; + } + + lsm_str_overwrite_copy_n(str, s, len); + + *ptr = str; + + return lsm_error_ok; +} + void lsm_str_overwrite(lsm_str *str, char *s) { str->len = strlen(s); @@ -59,8 +73,10 @@ void lsm_str_overwrite(lsm_str *str, char *s) { } lsm_error lsm_str_overwrite_copy(lsm_str *str, char *s) { - str->len = strlen(s); + return lsm_str_overwrite_copy_n(str, s, strlen(s)); +} +lsm_error lsm_str_overwrite_copy_n(lsm_str *str, char *s, uint64_t len) { if (str->len <= 8) { memcpy(str->data.val, s, str->len); } else { @@ -74,6 +90,8 @@ lsm_error lsm_str_overwrite_copy(lsm_str *str, char *s) { str->data.ptr = buf; } + str->len = len; + return lsm_error_ok; } From fbf6557c0534deb5185ea208299177f4ee55b5f2 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Fri, 3 Nov 2023 13:22:25 +0100 Subject: [PATCH 25/70] feat(http): add step for parsing content-length header --- include/http_loop.h | 9 ++++++++- src/http_loop/http_loop_req.c | 1 + src/http_loop/http_loop_steps.c | 35 ++++++++++++++++++++++++++++++++- 3 files changed, 43 insertions(+), 2 deletions(-) diff --git a/include/http_loop.h b/include/http_loop.h index c25becf..4737301 100644 --- a/include/http_loop.h +++ b/include/http_loop.h @@ -166,6 +166,13 @@ bool http_loop_step_body_to_buf(event_loop_conn *conn); */ bool http_loop_step_body_to_file(event_loop_conn *conn); +/** + * Try to parse the Content-Length header. + * + * @param conn connection to process + */ +bool http_loop_step_parse_content_length(event_loop_conn *conn); + /** * Authenticate the request using the X-Api-Key header. * @@ -201,7 +208,7 @@ bool http_loop_step_write_body(event_loop_conn *conn); * Initialize a new http loop. * * @param routes array of routes that should be served - * @parma route_count how many elements are in `routes` + * @param route_count how many elements are in `routes` * @param custom_gctx the application's custom global context; can be NULL * @param custom_ctx_init function to initialize a new custom context * @param custom_ctx_reset function to reset a custom context diff --git a/src/http_loop/http_loop_req.c b/src/http_loop/http_loop_req.c index 76e15d5..a8cd841 100644 --- a/src/http_loop/http_loop_req.c +++ b/src/http_loop/http_loop_req.c @@ -144,5 +144,6 @@ void http_loop_process_request(event_loop_conn *conn) { if ((conn->state != event_loop_conn_state_req) || (ctx->route->steps[ctx->current_step] == NULL)) { ctx->current_step = 0; + conn->state = event_loop_conn_state_res; } } diff --git a/src/http_loop/http_loop_steps.c b/src/http_loop/http_loop_steps.c index 474cb72..dfa8c96 100644 --- a/src/http_loop/http_loop_steps.c +++ b/src/http_loop/http_loop_steps.c @@ -1,5 +1,6 @@ #include +#include "http_loop.h" #include "lander.h" /* @@ -22,12 +23,44 @@ static bool string_to_num(size_t *res, const char *s, size_t len) { return true; } +bool http_loop_step_parse_content_length(event_loop_conn *conn) { + http_loop_ctx *ctx = conn->ctx; + + for (size_t i = 0; i < ctx->req.num_headers; i++) { + struct phr_header *header = &ctx->req.headers[i]; + + if (strncmp(header->name, "Content-Length", header->name_len) == 0) { + // If the content length header is present but contains an invalid + // number, we return a bad request error + if (!string_to_num(&ctx->req.body.expected_len, header->value, + header->value_len)) { + ctx->res.status = http_bad_request; + conn->state = event_loop_conn_state_res; + + return true; + } + // The content length was actually 0, so we can instantly return here + else if (ctx->req.body.expected_len == 0) { + return true; + } + } + } + + // A zero here means there's no content length header + if (ctx->req.body.expected_len == 0) { + ctx->res.status = http_length_required; + conn->state = event_loop_conn_state_res; + } + + return true; +} + /* * Try to find and parse the Content-Length header. This function returns true * if it was successful. If false is returned, the underlying step should * immediately exit. */ -static bool try_parse_content_length(event_loop_conn *conn) { +bool try_parse_content_length(event_loop_conn *conn) { http_loop_ctx *ctx = conn->ctx; for (size_t i = 0; i < ctx->req.num_headers; i++) { From 8b6d1f6e917593d218d89177339cb98820b9b066 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Fri, 3 Nov 2023 13:23:46 +0100 Subject: [PATCH 26/70] feat(lsm): add some attr & data support functions; fix str bug --- lsm/include/lsm/store.h | 38 +++++++++++++++++++++++++++++++++ lsm/src/store/lsm_store.c | 28 ++++++++++++++++++------ lsm/src/store/lsm_store_entry.c | 28 ++++++++++++++++++++++++ lsm/src/str/lsm_str.c | 8 +++---- 4 files changed, 91 insertions(+), 11 deletions(-) diff --git a/lsm/include/lsm/store.h b/lsm/include/lsm/store.h index d7e2e83..1557dd1 100644 --- a/lsm/include/lsm/store.h +++ b/lsm/include/lsm/store.h @@ -44,6 +44,16 @@ bool lsm_entry_attr_present(lsm_entry_handle *handle, lsm_attr_type type); lsm_error lsm_entry_attr_get(lsm_str **out, lsm_entry_handle *handle, lsm_attr_type type); +/** + * Convenience wrapper around `lsm_entry_attr_get` that can be used if we know + * beforehand the attribute value is a 64-bit number. + * + * @param out where to store attribute data + * @param entry entry to search for + * @param type type of attribute to return + */ +lsm_error lsm_entry_attr_get_num(uint64_t *out, lsm_entry_handle *handle, lsm_attr_type type); + /** * Add a new attribute to the entry. * @@ -54,6 +64,16 @@ lsm_error lsm_entry_attr_get(lsm_str **out, lsm_entry_handle *handle, lsm_error lsm_entry_attr_insert(lsm_entry_handle *handle, lsm_attr_type type, lsm_str *data); +/** + * Convenience wrapper around `lsm_entry_attr_insert` that can be used if the + * data to be stored is a 64-bit number. + * + * @param entry entry to modify + * @param type type of attribute to add + * @param data data of attribute + */ +lsm_error lsm_entry_attr_insert_num(lsm_entry_handle *handle, lsm_attr_type type, uint64_t data); + /** * Remove an atribute from the given entry, if present. * @@ -151,6 +171,16 @@ lsm_error lsm_store_insert(lsm_entry_handle **out, lsm_store *store, lsm_error lsm_entry_data_append(lsm_store *store, lsm_entry_handle *handle, lsm_str *data); +/** + * Same as `lsm_entry_data_append`, except that it takes a direct char array. + * + * @param store store the entry is stored in + * @param entry entry to append data to + * @param data data to append + * @param len length of data array + */ +lsm_error lsm_entry_data_append_raw(lsm_store *store, lsm_entry_handle *handle, char *data, uint64_t len); + /** * Read a number of bytes from the entry's data field. The position from which * data is read is dependent on previous read calls. @@ -163,4 +193,12 @@ lsm_error lsm_entry_data_append(lsm_store *store, lsm_entry_handle *handle, lsm_error lsm_entry_data_read(uint64_t *out, char *buf, lsm_entry_handle *handle, uint64_t len); +/** + * Return the length of the entry's data. + * + * @param handle entry handle to return length for + * @return length of the data + */ +uint64_t lsm_entry_data_len(lsm_entry_handle *handle); + #endif diff --git a/lsm/src/store/lsm_store.c b/lsm/src/store/lsm_store.c index 26e6af7..d056621 100644 --- a/lsm/src/store/lsm_store.c +++ b/lsm/src/store/lsm_store.c @@ -153,18 +153,32 @@ lsm_error lsm_store_insert(lsm_entry_handle **out, lsm_store *store, lsm_str *key) { // TODO what happens when two inserts to the same key happen at the same time? lsm_entry_wrapper *wrapper; - LSM_RES(lsm_entry_wrapper_init(&wrapper)); - pthread_rwlock_wrlock(&wrapper->lock); - lsm_error res = lsm_trie_insert(store->trie, key, wrapper); + // If a key was previously removed from the trie, the wrapper will already be + // present in the trie + if (lsm_trie_search((void **)&wrapper, store->trie, key) == lsm_error_not_found) { + LSM_RES(lsm_entry_wrapper_init(&wrapper)); + pthread_rwlock_wrlock(&wrapper->lock); - // Check if entry isn't already present in advance - if (res != lsm_error_ok) { - lsm_entry_wrapper_free(wrapper); + lsm_error res = lsm_trie_insert(store->trie, key, wrapper); - return res; + // Check if entry isn't already present in advance + if (res != lsm_error_ok) { + lsm_entry_wrapper_free(wrapper); + + return res; + } + } else { + pthread_rwlock_wrlock(&wrapper->lock); + + if (wrapper->entry != NULL) { + pthread_rwlock_unlock(&wrapper->lock); + + return lsm_error_already_present; + } } + lsm_entry *entry; LSM_RES(lsm_entry_init(&entry)); diff --git a/lsm/src/store/lsm_store_entry.c b/lsm/src/store/lsm_store_entry.c index ffbc138..d7bbc40 100644 --- a/lsm/src/store/lsm_store_entry.c +++ b/lsm/src/store/lsm_store_entry.c @@ -5,6 +5,7 @@ #include "lsm.h" #include "lsm/store_internal.h" +#include "lsm/str.h" lsm_error lsm_entry_init(lsm_entry **ptr) { lsm_entry *entry = calloc(1, sizeof(lsm_entry)); @@ -77,6 +78,22 @@ lsm_error lsm_entry_attr_get(lsm_str **out, lsm_entry_handle *handle, return lsm_error_ok; } +lsm_error lsm_entry_attr_get_num(uint64_t *out, lsm_entry_handle *handle, lsm_attr_type type) { + lsm_str *s; + + LSM_RES(lsm_entry_attr_get(&s, handle, type)); + + uint64_t num; + + for (uint8_t i = 0; i < sizeof(uint64_t) / sizeof(char); i++) { + ((char *)&num)[i] = lsm_str_char(s, i); + } + + *out = num; + + return lsm_error_ok; +} + lsm_error lsm_entry_attr_remove(lsm_str **out, lsm_entry_handle *handle, lsm_attr_type type) { if (!lsm_entry_attr_present(handle, type)) { @@ -149,3 +166,14 @@ lsm_error lsm_entry_attr_insert(lsm_entry_handle *handle, lsm_attr_type type, return lsm_error_ok; } + +lsm_error lsm_entry_attr_insert_num(lsm_entry_handle *handle, lsm_attr_type type, uint64_t data) { + lsm_str *s; + LSM_RES(lsm_str_init_copy_n(&s, (char *)&data, sizeof(uint64_t) / sizeof(char))); + + return lsm_entry_attr_insert(handle, type, s); +} + +uint64_t lsm_entry_data_len(lsm_entry_handle *handle) { + return handle->wrapper->entry->data.len; +} diff --git a/lsm/src/str/lsm_str.c b/lsm/src/str/lsm_str.c index afab724..0cfd571 100644 --- a/lsm/src/str/lsm_str.c +++ b/lsm/src/str/lsm_str.c @@ -77,16 +77,16 @@ lsm_error lsm_str_overwrite_copy(lsm_str *str, char *s) { } lsm_error lsm_str_overwrite_copy_n(lsm_str *str, char *s, uint64_t len) { - if (str->len <= 8) { - memcpy(str->data.val, s, str->len); + if (len <= 8) { + memcpy(str->data.val, s, len); } else { - char *buf = malloc(str->len * sizeof(char)); + char *buf = malloc(len * sizeof(char)); if (buf == NULL) { return lsm_error_failed_alloc; } - memcpy(buf, s, str->len); + memcpy(buf, s, len); str->data.ptr = buf; } From 711eaa2bde0a8eabdd297e3b9ed31f1a65792ee3 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Fri, 3 Nov 2023 14:10:14 +0100 Subject: [PATCH 27/70] feat(lander): initial integration of lsm --- include/lander.h | 21 +++++++- src/lander/lander.c | 24 ++++++--- src/lander/lander_get.c | 82 ++++++++++++++++++++++++++++ src/lander/lander_post.c | 114 +++++++++++++++++++++++++++++++++++++++ src/main.c | 5 ++ 5 files changed, 238 insertions(+), 8 deletions(-) diff --git a/include/lander.h b/include/lander.h index f44870b..0579a65 100644 --- a/include/lander.h +++ b/include/lander.h @@ -2,18 +2,27 @@ #define LANDER #include "http_loop.h" +#include "lsm/store.h" extern http_route lander_routes[4]; typedef struct lander_gctx { const char *data_dir; Trie *trie; + lsm_store *store; + } lander_gctx; typedef struct lander_ctx { - char *key; + lsm_entry_handle *entry; + uint64_t remaining_data; } lander_ctx; +typedef enum lander_entry_type { + lander_entry_type_redirect = 0, + lander_entry_type_paste = 1, +} lander_entry_type; + void *lander_gctx_init(); void *lander_ctx_init(); @@ -30,4 +39,14 @@ bool lander_post_redirect(event_loop_conn *conn); bool lander_post_paste(event_loop_conn *conn); +bool lander_post_paste_lsm(event_loop_conn *conn); + +bool lander_post_redirect_lsm(event_loop_conn *conn); + +bool lander_stream_body_to_entry(event_loop_conn *conn); + +bool lander_stream_body_to_client(event_loop_conn *conn); + +bool lander_get_entry_lsm(event_loop_conn *conn); + #endif diff --git a/src/lander/lander.c b/src/lander/lander.c index 5a86f76..c4c4ca7 100644 --- a/src/lander/lander.c +++ b/src/lander/lander.c @@ -1,6 +1,8 @@ #include +#include "http_loop.h" #include "lander.h" +#include "lsm/store.h" http_route lander_routes[] = { {.type = http_route_literal, @@ -13,24 +15,24 @@ http_route lander_routes[] = { .type = http_route_regex, .method = http_get, .path = "^/([^/]+)$", - .steps = {lander_get_entry, NULL}, - .steps_res = {http_loop_step_write_header, http_loop_step_write_body, + .steps = {lander_get_entry_lsm, NULL}, + .steps_res = {http_loop_step_write_header, lander_stream_body_to_client, NULL}, }, { .type = http_route_regex, .method = http_post, .path = "^/s(l?)/([^/]*)$", - .steps = {http_loop_step_auth, http_loop_step_body_to_buf, - lander_post_redirect, NULL}, + .steps = {http_loop_step_auth, http_loop_step_parse_content_length, + lander_post_redirect_lsm, lander_stream_body_to_entry, NULL}, .steps_res = {http_loop_step_write_header, http_loop_step_write_body, NULL}, }, {.type = http_route_regex, .method = http_post, .path = "^/p(l?)/([^/]*)$", - .steps = {http_loop_step_auth, lander_post_paste, - http_loop_step_body_to_file, http_loop_step_switch_res, NULL}, + .steps = {http_loop_step_auth, http_loop_step_parse_content_length, + lander_post_paste_lsm, lander_stream_body_to_entry, NULL}, .steps_res = {http_loop_step_write_header, http_loop_step_write_body, NULL}}, }; @@ -39,6 +41,14 @@ void *lander_gctx_init() { return calloc(1, sizeof(lander_gctx)); } void *lander_ctx_init() { return calloc(1, sizeof(lander_ctx)); } -void lander_ctx_reset(lander_ctx *ctx) {} +void lander_ctx_reset(lander_ctx *ctx) { + if (ctx->entry != NULL) { + lsm_entry_close(ctx->entry); + + ctx->entry = NULL; + } + + ctx->remaining_data = 0; +} void lander_ctx_free(lander_ctx *ctx) { free(ctx); } diff --git a/src/lander/lander_get.c b/src/lander/lander_get.c index b139d09..d2fee1e 100644 --- a/src/lander/lander_get.c +++ b/src/lander/lander_get.c @@ -1,6 +1,9 @@ #include +#include "event_loop.h" +#include "http/types.h" #include "lander.h" +#include "lsm/store.h" static const char index_page[] = "\n" @@ -50,3 +53,82 @@ bool lander_get_entry(event_loop_conn *conn) { return true; } + +bool lander_get_entry_lsm(event_loop_conn *conn) { + http_loop_ctx *ctx = conn->ctx; + lander_ctx *c_ctx = ctx->c; + http_loop_gctx *gctx = ctx->g; + lander_gctx *c_gctx = gctx->c; + + const char *key_s = &ctx->req.path[ctx->req.regex_groups[1].rm_so]; + int key_len = ctx->req.regex_groups[1].rm_eo - ctx->req.regex_groups[1].rm_so; + + lsm_str *key; + lsm_str_init_copy_n(&key, (char *)key_s, key_len); + + switch (lsm_store_open_read(&c_ctx->entry, c_gctx->store, key)) { + case lsm_error_ok: + break; + case lsm_error_not_found: + ctx->res.status = http_not_found; + return true; + default: + ctx->res.status = http_internal_server_error; + return true; + } + + lander_entry_type t; + lsm_entry_attr_get_num((uint64_t *)&t, c_ctx->entry, + lsm_attr_type_entry_type); + + if (t == lander_entry_type_redirect) { + // Stream entire redirect data into buffer to set as header + uint64_t data_len = lsm_entry_data_len(c_ctx->entry); + char *buf = malloc(data_len + 1); + uint64_t read = 0; + uint64_t total_read = 0; + + while (total_read < data_len) { + lsm_entry_data_read(&read, &buf[total_read], c_ctx->entry, + data_len - total_read); + total_read += read; + } + + buf[data_len] = '\0'; + + ctx->res.status = http_moved_permanently; + http_res_add_header(&ctx->res, http_header_location, buf, true); + + // We no longer need the entry at this point, so we can unlock it early + // This will also signal to the response code not to read any data from + // the entry + lsm_entry_close(c_ctx->entry); + c_ctx->entry = NULL; + } else { + ctx->res.body.expected_len = lsm_entry_data_len(c_ctx->entry); + } + + return true; +} + +bool lander_stream_body_to_client(event_loop_conn *conn) { + http_loop_ctx *ctx = conn->ctx; + lander_ctx *c_ctx = ctx->c; + + if ((c_ctx->entry == NULL) || + (ctx->res.body.expected_len == ctx->res.body.len)) { + return true; + } + + uint64_t to_write = MIN(EVENT_LOOP_BUFFER_SIZE - conn->wbuf_size, + ctx->res.body.expected_len - ctx->res.body.len); + + uint64_t read = 0; + lsm_entry_data_read(&read, (char *)&conn->wbuf[conn->wbuf_size], c_ctx->entry, + to_write); + + ctx->res.body.len += read; + conn->wbuf_size += read; + + return false; +} diff --git a/src/lander/lander_post.c b/src/lander/lander_post.c index 9288929..da9d1c4 100644 --- a/src/lander/lander_post.c +++ b/src/lander/lander_post.c @@ -1,6 +1,16 @@ #include "http/res.h" +#include "http/types.h" #include "lander.h" #include "log.h" +#include "lsm/store.h" + +static void randomize_key(char *key, int len) { + for (int i = 0; i < len; i++) { + key[i] = charset[rand() % charset_len]; + } + + key[len] = '\0'; +} // TODO entry leaks if key is already present static bool add_entry(char **key_ptr, int *key_len_ptr, http_loop_ctx *ctx, @@ -60,6 +70,110 @@ static bool add_entry(char **key_ptr, int *key_len_ptr, http_loop_ctx *ctx, return true; } +/** + * Insert a new entry into the store. + * + * @return true on success, false otherwise + */ +bool lander_insert_entry(http_loop_ctx *ctx) { + http_loop_gctx *gctx = ctx->g; + lander_gctx *c_gctx = gctx->c; + lander_ctx *c_ctx = ctx->c; + + lsm_str *key; + int key_len; + + if (ctx->req.regex_groups[2].rm_eo == ctx->req.regex_groups[2].rm_so) { + // Generate a random key to insert + bool secure = + (ctx->req.regex_groups[1].rm_eo - ctx->req.regex_groups[1].rm_so) == 1; + key_len = secure ? 16 : 4; + char *key_s = malloc((key_len + 1) * sizeof(char)); + + randomize_key(key_s, key_len); + lsm_str_init(&key, key_s); + } else { + char *key_s = (char *)&ctx->req.path[ctx->req.regex_groups[2].rm_so]; + key_len = ctx->req.regex_groups[2].rm_eo - ctx->req.regex_groups[2].rm_so; + + lsm_str_init_copy_n(&key, key_s, key_len); + } + + // TODO free key on error + switch (lsm_store_insert(&c_ctx->entry, c_gctx->store, key)) { + case lsm_error_already_present: + ctx->res.status = http_conflict; + return false; + case lsm_error_ok: + break; + default: + ctx->res.status = http_internal_server_error; + return false; + } + + // Add location header + char *buf = malloc(key_len + 2); + memcpy(&buf[1], lsm_str_ptr(key), key_len); + buf[0] = '/'; + buf[key_len + 1] = '\0'; + + http_res_add_header(&ctx->res, http_header_location, buf, true); + ctx->res.status = http_created; + + return true; +} + +bool lander_post_redirect_lsm(event_loop_conn *conn) { + http_loop_ctx *ctx = conn->ctx; + lander_ctx *c_ctx = ctx->c; + + if (!lander_insert_entry(ctx)) { + conn->state = event_loop_conn_state_res; + return true; + } + + lsm_entry_attr_insert_num(c_ctx->entry, lsm_attr_type_entry_type, + lander_entry_type_redirect); + + return true; +} + +bool lander_post_paste_lsm(event_loop_conn *conn) { + http_loop_ctx *ctx = conn->ctx; + lander_ctx *c_ctx = ctx->c; + + if (!lander_insert_entry(ctx)) { + conn->state = event_loop_conn_state_res; + return true; + } + + lsm_entry_attr_insert_num(c_ctx->entry, lsm_attr_type_entry_type, + lander_entry_type_paste); + + return true; +} + +bool lander_stream_body_to_entry(event_loop_conn *conn) { + http_loop_ctx *ctx = conn->ctx; + lander_ctx *c_ctx = ctx->c; + http_loop_gctx *gctx = ctx->g; + lander_gctx *c_gctx = gctx->c; + + uint64_t to_append = + MIN(conn->rbuf_size - conn->rbuf_read, + ctx->req.body.expected_len - lsm_entry_data_len(c_ctx->entry)); + + lsm_str *data; + lsm_str_init_copy_n(&data, (char *)&conn->rbuf[conn->rbuf_read], to_append); + lsm_entry_data_append(c_gctx->store, c_ctx->entry, data); + + conn->rbuf_read += to_append; + + lsm_str_free(data); + + return lsm_entry_data_len(c_ctx->entry) == ctx->req.body.expected_len; +} + bool lander_post_redirect(event_loop_conn *conn) { http_loop_ctx *ctx = conn->ctx; bool random = diff --git a/src/main.c b/src/main.c index cbefd01..6d69baf 100644 --- a/src/main.c +++ b/src/main.c @@ -49,6 +49,11 @@ int main() { c_gctx->data_dir = data_dir; c_gctx->trie = trie; + lsm_str *db_path, *data_dir2; + lsm_str_init_copy(&db_path, "data/store.db"); + lsm_str_init_copy(&data_dir2, "data"); + lsm_store_load(&c_gctx->store, db_path, data_dir2); + http_loop *hl = http_loop_init( lander_routes, sizeof(lander_routes) / sizeof(lander_routes[0]), c_gctx, lander_ctx_init, (void (*)(void *))lander_ctx_reset, From 72fae76ef6239e4a8cd4f0e07cabcafd859c81ff Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Fri, 3 Nov 2023 14:41:55 +0100 Subject: [PATCH 28/70] fix(http): don't segfault on non-routed requests --- include/http_loop.h | 2 ++ src/http_loop/http_loop.c | 3 +++ src/http_loop/http_loop_res.c | 9 ++++++--- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/include/http_loop.h b/include/http_loop.h index 4737301..fe13382 100644 --- a/include/http_loop.h +++ b/include/http_loop.h @@ -34,6 +34,8 @@ typedef enum http_route_type { */ typedef bool (*step)(event_loop_conn *conn); +extern const step http_default_res_steps[HTTP_LOOP_MAX_STEPS]; + /** * Struct describing a route a request can take. */ diff --git a/src/http_loop/http_loop.c b/src/http_loop/http_loop.c index dc76061..d65ea1c 100644 --- a/src/http_loop/http_loop.c +++ b/src/http_loop/http_loop.c @@ -4,6 +4,9 @@ #include "http_loop.h" #include "log.h" +const step http_default_res_steps[HTTP_LOOP_MAX_STEPS] = { + http_loop_step_write_header, http_loop_step_write_body, NULL}; + bool http_loop_handle_request(event_loop_conn *conn) { // Prevents the request handler function from looping indefinitely without // ever consuming new data diff --git a/src/http_loop/http_loop_res.c b/src/http_loop/http_loop_res.c index 7cbf9d3..cb372da 100644 --- a/src/http_loop/http_loop_res.c +++ b/src/http_loop/http_loop_res.c @@ -105,9 +105,12 @@ bool http_loop_step_write_body(event_loop_conn *conn) { void http_loop_handle_response(event_loop_conn *conn) { http_loop_ctx *ctx = conn->ctx; + // Non-routed requests also need to be processed + const step *steps = + ctx->route != NULL ? ctx->route->steps_res : http_default_res_steps; + while ((conn->state == event_loop_conn_state_res) && - (ctx->route->steps_res[ctx->current_step] != NULL) && - ctx->route->steps_res[ctx->current_step](conn)) { + (steps[ctx->current_step] != NULL) && steps[ctx->current_step](conn)) { ctx->current_step++; } @@ -115,7 +118,7 @@ void http_loop_handle_response(event_loop_conn *conn) { // After response processing has finished its work, we reset the context to // prepare for a new request if ((conn->state != event_loop_conn_state_res) || - (ctx->route->steps_res[ctx->current_step] == NULL)) { + (steps[ctx->current_step] == NULL)) { http_loop_ctx_reset(ctx); conn->state = event_loop_conn_state_req; From eb9157281b478b510e039a3eb8be41dfc64787cc Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Fri, 3 Nov 2023 15:02:03 +0100 Subject: [PATCH 29/70] chore(http): rename step to http_step --- include/http_loop.h | 8 ++++---- src/http_loop/http_loop.c | 2 +- src/http_loop/http_loop_res.c | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/http_loop.h b/include/http_loop.h index fe13382..e05ee22 100644 --- a/include/http_loop.h +++ b/include/http_loop.h @@ -32,9 +32,9 @@ typedef enum http_route_type { * before the step can finish its processing. For response steps, `false` means * there's new data in the write buffer that needs to be written. */ -typedef bool (*step)(event_loop_conn *conn); +typedef bool (*http_step)(event_loop_conn *conn); -extern const step http_default_res_steps[HTTP_LOOP_MAX_STEPS]; +extern const http_step http_default_res_steps[HTTP_LOOP_MAX_STEPS]; /** * Struct describing a route a request can take. @@ -46,8 +46,8 @@ typedef struct http_route { // Compiled regex for a regex route. This value gets set at runtime when // starting the http loop regex_t *regex; - step steps[HTTP_LOOP_MAX_STEPS]; - step steps_res[HTTP_LOOP_MAX_STEPS]; + const http_step steps[HTTP_LOOP_MAX_STEPS]; + const http_step steps_res[HTTP_LOOP_MAX_STEPS]; } http_route; /** diff --git a/src/http_loop/http_loop.c b/src/http_loop/http_loop.c index d65ea1c..964992d 100644 --- a/src/http_loop/http_loop.c +++ b/src/http_loop/http_loop.c @@ -4,7 +4,7 @@ #include "http_loop.h" #include "log.h" -const step http_default_res_steps[HTTP_LOOP_MAX_STEPS] = { +const http_step http_default_res_steps[HTTP_LOOP_MAX_STEPS] = { http_loop_step_write_header, http_loop_step_write_body, NULL}; bool http_loop_handle_request(event_loop_conn *conn) { diff --git a/src/http_loop/http_loop_res.c b/src/http_loop/http_loop_res.c index cb372da..fec7cd9 100644 --- a/src/http_loop/http_loop_res.c +++ b/src/http_loop/http_loop_res.c @@ -106,7 +106,7 @@ void http_loop_handle_response(event_loop_conn *conn) { http_loop_ctx *ctx = conn->ctx; // Non-routed requests also need to be processed - const step *steps = + const http_step *steps = ctx->route != NULL ? ctx->route->steps_res : http_default_res_steps; while ((conn->state == event_loop_conn_state_res) && From 46f89059e4f7ba79ba98a20ab619c711d2812122 Mon Sep 17 00:00:00 2001 From: Jef Roosens Date: Tue, 7 Nov 2023 17:43:15 +0100 Subject: [PATCH 30/70] feat(lsm): start of on-disk database --- lsm/example/test.c | 9 +- lsm/include/lsm/store.h | 11 ++- lsm/src/_include/lsm/store_internal.h | 19 ++++- lsm/src/store/lsm_store.c | 42 +++++++++- lsm/src/store/lsm_store_sync.c | 116 ++++++++++++++++++++++++++ src/main.c | 25 +++--- 6 files changed, 199 insertions(+), 23 deletions(-) create mode 100644 lsm/src/store/lsm_store_sync.c diff --git a/lsm/example/test.c b/lsm/example/test.c index 2a7e3d3..1445720 100644 --- a/lsm/example/test.c +++ b/lsm/example/test.c @@ -6,12 +6,11 @@ #include "lsm/str.h" int main() { - lsm_str *db_path, *data_dir; - lsm_str_init_copy(&db_path, "data/data.db"); + lsm_str *data_dir; lsm_str_init_copy(&data_dir, "data"); lsm_store *store; - lsm_store_load(&store, db_path, data_dir); + lsm_store_load(&store, data_dir); lsm_str *key; lsm_str_init_copy(&key, "key"); @@ -26,6 +25,10 @@ int main() { lsm_entry_data_append(store, handle, data); } + if (lsm_entry_sync(store, handle) != lsm_error_ok) { + printf("godver"); + return 1; + } lsm_entry_close(handle); assert(lsm_store_open_read(&handle, store, key) == lsm_error_ok); diff --git a/lsm/include/lsm/store.h b/lsm/include/lsm/store.h index 1557dd1..7518059 100644 --- a/lsm/include/lsm/store.h +++ b/lsm/include/lsm/store.h @@ -104,10 +104,9 @@ lsm_error lsm_store_init(lsm_store **ptr); * Open the given database file and load it into a new store object. * * @param ptr pointer to store newly allocated store - * @param db_path path to the database file * @param data_path path to the data directory */ -lsm_error lsm_store_load(lsm_store **ptr, lsm_str *db_path, lsm_str *data_path); +lsm_error lsm_store_load(lsm_store **ptr, lsm_str *data_path); /** * Dealocate an existing lsm_store object. @@ -193,6 +192,14 @@ lsm_error lsm_entry_data_append_raw(lsm_store *store, lsm_entry_handle *handle, lsm_error lsm_entry_data_read(uint64_t *out, char *buf, lsm_entry_handle *handle, uint64_t len); +/** + * Persist the entry's data to disk. + * + * @param store store to persist entry in + * @param handle handle to entry to persist + */ +lsm_error lsm_entry_sync(lsm_store *store, lsm_entry_handle *handle); + /** * Return the length of the entry's data. * diff --git a/lsm/src/_include/lsm/store_internal.h b/lsm/src/_include/lsm/store_internal.h index c8bad4c..d45fc36 100644 --- a/lsm/src/_include/lsm/store_internal.h +++ b/lsm/src/_include/lsm/store_internal.h @@ -8,6 +8,9 @@ #include "lsm/str_internal.h" #include "lsm/trie.h" +#define LSM_DB_FILE_NAME "lsm.db" +#define LSM_IDX_FILE_NAME "lsm.idx" + typedef struct lsm_attr { lsm_attr_type type; lsm_str *str; @@ -70,7 +73,21 @@ lsm_error lsm_entry_handle_init(lsm_entry_handle **out); struct lsm_store { lsm_trie *trie; lsm_str *data_path; - lsm_str *db_path; + FILE *db_file; + uint64_t db_file_size; + pthread_mutex_t db_lock; + FILE *idx_file; + uint64_t idx_file_size; + pthread_mutex_t idx_lock; }; +/** + * Read in the database and construct the in-memory trie index. This function + * assumes the provided store is a newly initialized empty store with the + * database files opened. + * + * @param store store to read + */ +lsm_error lsm_store_load_db(lsm_store *store); + #endif diff --git a/lsm/src/store/lsm_store.c b/lsm/src/store/lsm_store.c index d056621..e2d62cc 100644 --- a/lsm/src/store/lsm_store.c +++ b/lsm/src/store/lsm_store.c @@ -22,20 +22,54 @@ lsm_error lsm_store_init(lsm_store **ptr) { return res; } + pthread_mutex_init(&store->db_lock, NULL); + pthread_mutex_init(&store->idx_lock, NULL); + *ptr = store; return lsm_error_ok; } -lsm_error lsm_store_load(lsm_store **ptr, lsm_str *db_path, - lsm_str *data_path) { +lsm_error lsm_store_load(lsm_store **ptr, lsm_str *data_path) { lsm_store *store; LSM_RES(lsm_store_init(&store)); - // TODO implement all of reading the db file + // Try to open an existing db file or create a new one otherwise + // This shit is why I need to improve the str library + char db_file_path[lsm_str_len(data_path) + strlen(LSM_DB_FILE_NAME) + 2]; + memcpy(db_file_path, lsm_str_ptr(data_path), lsm_str_len(data_path) * sizeof(char)); + sprintf(&db_file_path[lsm_str_len(data_path)], "/%s", LSM_DB_FILE_NAME); + + FILE *db_file = fopen(db_file_path, "r+b"); + + if (db_file == NULL) { + db_file = fopen(db_file_path, "wb"); + + if (db_file == NULL) { + return lsm_error_failed_io; + } + } + + // Same for idx file + char idx_file_path[lsm_str_len(data_path) + strlen(LSM_IDX_FILE_NAME) + 2]; + memcpy(idx_file_path, lsm_str_ptr(data_path), lsm_str_len(data_path) * sizeof(char)); + sprintf(&idx_file_path[lsm_str_len(data_path)], "/%s", LSM_IDX_FILE_NAME); + + FILE *idx_file = fopen(idx_file_path, "r+b"); + + if (idx_file == NULL) { + idx_file = fopen(idx_file_path, "wb"); + + if (idx_file == NULL) { + return lsm_error_failed_io; + } + } + + LSM_RES(lsm_store_load_db(store)); - store->db_path = db_path; store->data_path = data_path; + store->db_file = db_file; + store->idx_file = idx_file; *ptr = store; diff --git a/lsm/src/store/lsm_store_sync.c b/lsm/src/store/lsm_store_sync.c new file mode 100644 index 0000000..49f6d61 --- /dev/null +++ b/lsm/src/store/lsm_store_sync.c @@ -0,0 +1,116 @@ +#include "lsm/store_internal.h" +#include + +static lsm_error lsm_entry_write_uint64_t(FILE *f, uint64_t num) { + size_t res = fwrite(&num, sizeof(uint64_t), 1, f); + + // Such a small write should succeed in one go + if (res == 0) { + return lsm_error_failed_io; + } + + return lsm_error_ok; +} + +static lsm_error lsm_entry_write_str(FILE *f, lsm_str *s) { + uint64_t to_write = lsm_str_len(s); + uint64_t written = 0; + + do { + written += fwrite(lsm_str_ptr(s), sizeof(char), to_write - written, f); + } while (written < to_write); + + return lsm_error_ok; +} + +lsm_error lsm_entry_write_db(uint64_t *size, FILE *db_file, lsm_entry *entry) { + // First we write how many attributes follow + LSM_RES(lsm_entry_write_uint64_t(db_file, entry->attrs.count)); + *size = sizeof(uint64_t); + + for (uint64_t i = 0; i < entry->attrs.count; i++) { + // Write attribute type, length & value + LSM_RES(lsm_entry_write_uint64_t(db_file, entry->attrs.items[i].type)); + LSM_RES(lsm_entry_write_uint64_t(db_file, lsm_str_len(entry->attrs.items[i].str))); + LSM_RES(lsm_entry_write_str(db_file, entry->attrs.items[i].str)); + + *size += 2 * sizeof(uint64_t) + lsm_str_len(entry->attrs.items[i].str) * sizeof(char); + } + + printf("db size: %lu\n", *size); + + return lsm_error_ok; +} + +lsm_error lsm_entry_write_idx(uint64_t *size, FILE *idx_file, lsm_entry *entry, uint64_t offset, uint64_t len) { + LSM_RES(lsm_entry_write_uint64_t(idx_file, lsm_str_len(entry->key))); + LSM_RES(lsm_entry_write_str(idx_file, entry->key)); + LSM_RES(lsm_entry_write_uint64_t(idx_file, offset)); + LSM_RES(lsm_entry_write_uint64_t(idx_file, len)); + + *size = 3 * sizeof(uint64_t) + lsm_str_len(entry->key) * sizeof(char); + + return lsm_error_ok; +} + +lsm_error lsm_entry_sync(lsm_store *store, lsm_entry_handle *handle) { + pthread_mutex_lock(&store->db_lock); + + // Append entry to end of database file + if (fseek(store->db_file, SEEK_SET, store->db_file_size) != 0) { + pthread_mutex_unlock(&store->db_lock); + + return lsm_error_failed_io; + } + + uint64_t entry_size; + lsm_error res = lsm_entry_write_db(&entry_size, store->db_file, handle->wrapper->entry); + fflush(store->db_file); + + // TODO fsync db file? + + if (res != lsm_error_ok) { + pthread_mutex_unlock(&store->db_lock); + + return res; + } + + uint64_t entry_index = store->db_file_size; + store->db_file_size += entry_size; + + pthread_mutex_unlock(&store->db_lock); + + // Append entry to index file + pthread_mutex_lock(&store->idx_lock); + + if (fseek(store->idx_file, SEEK_SET, store->idx_file_size) != 0) { + pthread_mutex_unlock(&store->idx_lock); + + return lsm_error_failed_io; + } + + res = lsm_entry_write_idx(&entry_size, store->idx_file, handle->wrapper->entry, entry_index, entry_size); + fflush(store->idx_file); + + if (res == lsm_error_ok) { + store->idx_file_size += entry_size; + } + + pthread_mutex_unlock(&store->idx_lock); + + return res; +} + +lsm_error lsm_store_load_db(lsm_store *store) { + uint64_t key_len; + size_t res; + lsm_str *key; + + while (feof(store->idx_file) > 0) { + res = fread(&key_len, sizeof(uint64_t), 1, store->idx_file); + + if (res == 0) { + return lsm_error_failed_io; + } + } +} diff --git a/src/main.c b/src/main.c index 6d69baf..f552081 100644 --- a/src/main.c +++ b/src/main.c @@ -31,28 +31,27 @@ int main() { critical(1, "Invalid TCP port %s", port_str); } - char file_path[strlen(data_dir) + 12 + 1]; - sprintf(file_path, "%s/lander.data", data_dir); + /* char file_path[strlen(data_dir) + 12 + 1]; */ + /* sprintf(file_path, "%s/lander.data", data_dir); */ - info("Initializing trie from file '%s'", file_path); + /* info("Initializing trie from file '%s'", file_path); */ - Trie *trie; - TrieExitCode res = trie_init(&trie, file_path); + /* Trie *trie; */ + /* TrieExitCode res = trie_init(&trie, file_path); */ - if (res != Ok) { - critical(1, "An error occured while populating the trie."); - } + /* if (res != Ok) { */ + /* critical(1, "An error occured while populating the trie."); */ + /* } */ - info("Trie initialized and populated with %i entries", trie_size(trie)); + /* info("Trie initialized and populated with %i entries", trie_size(trie)); */ lander_gctx *c_gctx = lander_gctx_init(); c_gctx->data_dir = data_dir; - c_gctx->trie = trie; + /* c_gctx->trie = trie; */ - lsm_str *db_path, *data_dir2; - lsm_str_init_copy(&db_path, "data/store.db"); + lsm_str *data_dir2; lsm_str_init_copy(&data_dir2, "data"); - lsm_store_load(&c_gctx->store, db_path, data_dir2); + lsm_store_load(&c_gctx->store, data_dir2); http_loop *hl = http_loop_init( lander_routes, sizeof(lander_routes) / sizeof(lander_routes[0]), c_gctx, From 38e9496717957319f9f15a198396ffdcd1f8958a Mon Sep 17 00:00:00 2001 From: Jef Roosens Date: Tue, 7 Nov 2023 23:00:22 +0100 Subject: [PATCH 31/70] feat(lsm): possibly added reading db file on load --- lsm/example/test.c | 6 +- lsm/src/_include/lsm/store_internal.h | 3 + lsm/src/store/lsm_store.c | 14 ++- lsm/src/store/lsm_store_sync.c | 126 ++++++++++++++++++++++++-- 4 files changed, 138 insertions(+), 11 deletions(-) diff --git a/lsm/example/test.c b/lsm/example/test.c index 1445720..e1e3b69 100644 --- a/lsm/example/test.c +++ b/lsm/example/test.c @@ -10,13 +10,17 @@ int main() { lsm_str_init_copy(&data_dir, "data"); lsm_store *store; - lsm_store_load(&store, data_dir); + assert(lsm_store_load(&store, data_dir) == lsm_error_ok); lsm_str *key; lsm_str_init_copy(&key, "key"); lsm_entry_handle *handle; assert(lsm_store_insert(&handle, store, key) == lsm_error_ok); + + lsm_str *attr; + lsm_str_init_copy(&attr, "some attribute value"); + lsm_entry_attr_insert(handle, lsm_attr_type_content_type, attr); lsm_str *data; lsm_str_init_copy(&data, "hello"); diff --git a/lsm/src/_include/lsm/store_internal.h b/lsm/src/_include/lsm/store_internal.h index d45fc36..b9546e9 100644 --- a/lsm/src/_include/lsm/store_internal.h +++ b/lsm/src/_include/lsm/store_internal.h @@ -73,10 +73,13 @@ lsm_error lsm_entry_handle_init(lsm_entry_handle **out); struct lsm_store { lsm_trie *trie; lsm_str *data_path; + FILE *db_file; uint64_t db_file_size; pthread_mutex_t db_lock; + FILE *idx_file; + uint64_t idx_file_block_count; uint64_t idx_file_size; pthread_mutex_t idx_lock; }; diff --git a/lsm/src/store/lsm_store.c b/lsm/src/store/lsm_store.c index e2d62cc..172c6d2 100644 --- a/lsm/src/store/lsm_store.c +++ b/lsm/src/store/lsm_store.c @@ -63,14 +63,24 @@ lsm_error lsm_store_load(lsm_store **ptr, lsm_str *data_path) { if (idx_file == NULL) { return lsm_error_failed_io; } - } - LSM_RES(lsm_store_load_db(store)); + // The database code expects the idx file to start with how many blocks it + // contains, so we write that here + uint64_t num = 0; + + if (fwrite(&num, sizeof(uint64_t), 1, idx_file) == 0) { + return lsm_error_failed_io; + } + + fflush(idx_file); + } store->data_path = data_path; store->db_file = db_file; store->idx_file = idx_file; + LSM_RES(lsm_store_load_db(store)); + *ptr = store; return lsm_error_ok; diff --git a/lsm/src/store/lsm_store_sync.c b/lsm/src/store/lsm_store_sync.c index 49f6d61..39e7658 100644 --- a/lsm/src/store/lsm_store_sync.c +++ b/lsm/src/store/lsm_store_sync.c @@ -1,5 +1,9 @@ -#include "lsm/store_internal.h" +#include #include +#include + +#include "lsm/store.h" +#include "lsm/store_internal.h" static lsm_error lsm_entry_write_uint64_t(FILE *f, uint64_t num) { size_t res = fwrite(&num, sizeof(uint64_t), 1, f); @@ -37,8 +41,6 @@ lsm_error lsm_entry_write_db(uint64_t *size, FILE *db_file, lsm_entry *entry) { *size += 2 * sizeof(uint64_t) + lsm_str_len(entry->attrs.items[i].str) * sizeof(char); } - printf("db size: %lu\n", *size); - return lsm_error_ok; } @@ -57,7 +59,7 @@ lsm_error lsm_entry_sync(lsm_store *store, lsm_entry_handle *handle) { pthread_mutex_lock(&store->db_lock); // Append entry to end of database file - if (fseek(store->db_file, SEEK_SET, store->db_file_size) != 0) { + if (fseek(store->db_file, store->db_file_size, SEEK_SET) != 0) { pthread_mutex_unlock(&store->db_lock); return lsm_error_failed_io; @@ -83,34 +85,142 @@ lsm_error lsm_entry_sync(lsm_store *store, lsm_entry_handle *handle) { // Append entry to index file pthread_mutex_lock(&store->idx_lock); - if (fseek(store->idx_file, SEEK_SET, store->idx_file_size) != 0) { + if (fseek(store->idx_file, store->idx_file_size, SEEK_SET) != 0) { + printf("failed seek, %lu\n", store->idx_file_size); pthread_mutex_unlock(&store->idx_lock); return lsm_error_failed_io; } res = lsm_entry_write_idx(&entry_size, store->idx_file, handle->wrapper->entry, entry_index, entry_size); - fflush(store->idx_file); if (res == lsm_error_ok) { + // Update the counter at the beginning of the file + uint64_t new_block_count = store->idx_file_block_count + 1; + + if (fseek(store->idx_file, 0, SEEK_SET) != 0) { + pthread_mutex_unlock(&store->idx_lock); + + return lsm_error_failed_io; + } + + size_t r = fwrite(&new_block_count, sizeof(uint64_t), 1, store->idx_file); + + if (r != lsm_error_ok) { + printf("wuck\n"); + pthread_mutex_unlock(&store->idx_lock); + + return res; + } + store->idx_file_size += entry_size; + store->idx_file_block_count = new_block_count; + } else { + printf("failed write\n"); } + fflush(store->idx_file); + pthread_mutex_unlock(&store->idx_lock); return res; } +static lsm_error lsm_entry_read_attrs(lsm_entry_handle *handle, FILE *db_file) { + uint64_t attr_count; + size_t res = fread(&attr_count, sizeof(uint64_t), 1, db_file); + + if (res == 0) { + return lsm_error_failed_io; + } + + // attr_type, val_len + uint64_t nums[2]; + lsm_str *val; + + for (uint64_t i = 0; i < attr_count; i++) { + res = fread(nums, sizeof(uint64_t), 2, db_file); + + if (res < 2) { + return lsm_error_failed_io; + } + + char *val_s = malloc(nums[1] + 1); + val_s[nums[1]] = '\0'; + + if (val_s == NULL) { + return lsm_error_failed_alloc; + } + + uint64_t read = 0; + + while (read < nums[1]) { + read += fread(&val_s[read], 1, nums[1] - read, db_file); + } + + LSM_RES(lsm_str_init(&val, val_s));; + lsm_entry_attr_insert(handle, nums[0], val); + } + + return lsm_error_ok; +} + lsm_error lsm_store_load_db(lsm_store *store) { uint64_t key_len; - size_t res; + uint64_t db_dim[2]; lsm_str *key; + lsm_entry_handle *handle; - while (feof(store->idx_file) > 0) { + // idx file starts with block count + size_t res = fread(&store->idx_file_block_count, sizeof(uint64_t), 1, store->idx_file); + + if (res == 0) { + return lsm_error_failed_io; + } + + store->idx_file_size += sizeof(uint64_t); + + for (uint64_t i = 0; i < store->idx_file_block_count; i++) { + // Read in idx metadata res = fread(&key_len, sizeof(uint64_t), 1, store->idx_file); if (res == 0) { return lsm_error_failed_io; } + + char *key_s = malloc(key_len + 1); + key_s[key_len] = '\0'; + + if (key_s == NULL) { + return lsm_error_failed_alloc; + } + + res = fread(key_s, 1, key_len, store->idx_file); + + if (res < key_len) { + return lsm_error_failed_io; + } + + res = fread(db_dim, sizeof(uint64_t), 2, store->idx_file); + + if (res < 2) { + return lsm_error_failed_io; + } + + LSM_RES(lsm_str_init(&key, key_s)); + LSM_RES(lsm_store_insert(&handle, store, key)); + + // Read attributes from database file + if (fseek(store->db_file, db_dim[0], SEEK_SET) != 0) { + return lsm_error_failed_io; + } + + LSM_RES(lsm_entry_read_attrs(handle, store->db_file)); + lsm_entry_close(handle); + + store->idx_file_size += 3 * sizeof(uint64_t) + key_len; + store->db_file_size += db_dim[1]; } + + return lsm_error_ok; } From 719a65beff9ef6a6e23d30180b9c9420cd18231d Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Wed, 8 Nov 2023 08:47:24 +0100 Subject: [PATCH 32/70] chore(lsm): format code --- lsm/example/test.c | 2 +- lsm/include/lsm/store.h | 9 ++++++--- lsm/src/store/lsm_store.c | 10 ++++++---- lsm/src/store/lsm_store_entry.c | 9 ++++++--- lsm/src/store/lsm_store_sync.c | 25 ++++++++++++++++--------- 5 files changed, 35 insertions(+), 20 deletions(-) diff --git a/lsm/example/test.c b/lsm/example/test.c index e1e3b69..bd78f2e 100644 --- a/lsm/example/test.c +++ b/lsm/example/test.c @@ -17,7 +17,7 @@ int main() { lsm_entry_handle *handle; assert(lsm_store_insert(&handle, store, key) == lsm_error_ok); - + lsm_str *attr; lsm_str_init_copy(&attr, "some attribute value"); lsm_entry_attr_insert(handle, lsm_attr_type_content_type, attr); diff --git a/lsm/include/lsm/store.h b/lsm/include/lsm/store.h index 7518059..72334c6 100644 --- a/lsm/include/lsm/store.h +++ b/lsm/include/lsm/store.h @@ -52,7 +52,8 @@ lsm_error lsm_entry_attr_get(lsm_str **out, lsm_entry_handle *handle, * @param entry entry to search for * @param type type of attribute to return */ -lsm_error lsm_entry_attr_get_num(uint64_t *out, lsm_entry_handle *handle, lsm_attr_type type); +lsm_error lsm_entry_attr_get_num(uint64_t *out, lsm_entry_handle *handle, + lsm_attr_type type); /** * Add a new attribute to the entry. @@ -72,7 +73,8 @@ lsm_error lsm_entry_attr_insert(lsm_entry_handle *handle, lsm_attr_type type, * @param type type of attribute to add * @param data data of attribute */ -lsm_error lsm_entry_attr_insert_num(lsm_entry_handle *handle, lsm_attr_type type, uint64_t data); +lsm_error lsm_entry_attr_insert_num(lsm_entry_handle *handle, + lsm_attr_type type, uint64_t data); /** * Remove an atribute from the given entry, if present. @@ -178,7 +180,8 @@ lsm_error lsm_entry_data_append(lsm_store *store, lsm_entry_handle *handle, * @param data data to append * @param len length of data array */ -lsm_error lsm_entry_data_append_raw(lsm_store *store, lsm_entry_handle *handle, char *data, uint64_t len); +lsm_error lsm_entry_data_append_raw(lsm_store *store, lsm_entry_handle *handle, + char *data, uint64_t len); /** * Read a number of bytes from the entry's data field. The position from which diff --git a/lsm/src/store/lsm_store.c b/lsm/src/store/lsm_store.c index 172c6d2..9c0ab83 100644 --- a/lsm/src/store/lsm_store.c +++ b/lsm/src/store/lsm_store.c @@ -37,7 +37,8 @@ lsm_error lsm_store_load(lsm_store **ptr, lsm_str *data_path) { // Try to open an existing db file or create a new one otherwise // This shit is why I need to improve the str library char db_file_path[lsm_str_len(data_path) + strlen(LSM_DB_FILE_NAME) + 2]; - memcpy(db_file_path, lsm_str_ptr(data_path), lsm_str_len(data_path) * sizeof(char)); + memcpy(db_file_path, lsm_str_ptr(data_path), + lsm_str_len(data_path) * sizeof(char)); sprintf(&db_file_path[lsm_str_len(data_path)], "/%s", LSM_DB_FILE_NAME); FILE *db_file = fopen(db_file_path, "r+b"); @@ -52,7 +53,8 @@ lsm_error lsm_store_load(lsm_store **ptr, lsm_str *data_path) { // Same for idx file char idx_file_path[lsm_str_len(data_path) + strlen(LSM_IDX_FILE_NAME) + 2]; - memcpy(idx_file_path, lsm_str_ptr(data_path), lsm_str_len(data_path) * sizeof(char)); + memcpy(idx_file_path, lsm_str_ptr(data_path), + lsm_str_len(data_path) * sizeof(char)); sprintf(&idx_file_path[lsm_str_len(data_path)], "/%s", LSM_IDX_FILE_NAME); FILE *idx_file = fopen(idx_file_path, "r+b"); @@ -200,7 +202,8 @@ lsm_error lsm_store_insert(lsm_entry_handle **out, lsm_store *store, // If a key was previously removed from the trie, the wrapper will already be // present in the trie - if (lsm_trie_search((void **)&wrapper, store->trie, key) == lsm_error_not_found) { + if (lsm_trie_search((void **)&wrapper, store->trie, key) == + lsm_error_not_found) { LSM_RES(lsm_entry_wrapper_init(&wrapper)); pthread_rwlock_wrlock(&wrapper->lock); @@ -222,7 +225,6 @@ lsm_error lsm_store_insert(lsm_entry_handle **out, lsm_store *store, } } - lsm_entry *entry; LSM_RES(lsm_entry_init(&entry)); diff --git a/lsm/src/store/lsm_store_entry.c b/lsm/src/store/lsm_store_entry.c index d7bbc40..2878996 100644 --- a/lsm/src/store/lsm_store_entry.c +++ b/lsm/src/store/lsm_store_entry.c @@ -78,7 +78,8 @@ lsm_error lsm_entry_attr_get(lsm_str **out, lsm_entry_handle *handle, return lsm_error_ok; } -lsm_error lsm_entry_attr_get_num(uint64_t *out, lsm_entry_handle *handle, lsm_attr_type type) { +lsm_error lsm_entry_attr_get_num(uint64_t *out, lsm_entry_handle *handle, + lsm_attr_type type) { lsm_str *s; LSM_RES(lsm_entry_attr_get(&s, handle, type)); @@ -167,9 +168,11 @@ lsm_error lsm_entry_attr_insert(lsm_entry_handle *handle, lsm_attr_type type, return lsm_error_ok; } -lsm_error lsm_entry_attr_insert_num(lsm_entry_handle *handle, lsm_attr_type type, uint64_t data) { +lsm_error lsm_entry_attr_insert_num(lsm_entry_handle *handle, + lsm_attr_type type, uint64_t data) { lsm_str *s; - LSM_RES(lsm_str_init_copy_n(&s, (char *)&data, sizeof(uint64_t) / sizeof(char))); + LSM_RES( + lsm_str_init_copy_n(&s, (char *)&data, sizeof(uint64_t) / sizeof(char))); return lsm_entry_attr_insert(handle, type, s); } diff --git a/lsm/src/store/lsm_store_sync.c b/lsm/src/store/lsm_store_sync.c index 39e7658..caaa42b 100644 --- a/lsm/src/store/lsm_store_sync.c +++ b/lsm/src/store/lsm_store_sync.c @@ -35,16 +35,19 @@ lsm_error lsm_entry_write_db(uint64_t *size, FILE *db_file, lsm_entry *entry) { for (uint64_t i = 0; i < entry->attrs.count; i++) { // Write attribute type, length & value LSM_RES(lsm_entry_write_uint64_t(db_file, entry->attrs.items[i].type)); - LSM_RES(lsm_entry_write_uint64_t(db_file, lsm_str_len(entry->attrs.items[i].str))); + LSM_RES(lsm_entry_write_uint64_t(db_file, + lsm_str_len(entry->attrs.items[i].str))); LSM_RES(lsm_entry_write_str(db_file, entry->attrs.items[i].str)); - *size += 2 * sizeof(uint64_t) + lsm_str_len(entry->attrs.items[i].str) * sizeof(char); + *size += 2 * sizeof(uint64_t) + + lsm_str_len(entry->attrs.items[i].str) * sizeof(char); } return lsm_error_ok; } -lsm_error lsm_entry_write_idx(uint64_t *size, FILE *idx_file, lsm_entry *entry, uint64_t offset, uint64_t len) { +lsm_error lsm_entry_write_idx(uint64_t *size, FILE *idx_file, lsm_entry *entry, + uint64_t offset, uint64_t len) { LSM_RES(lsm_entry_write_uint64_t(idx_file, lsm_str_len(entry->key))); LSM_RES(lsm_entry_write_str(idx_file, entry->key)); LSM_RES(lsm_entry_write_uint64_t(idx_file, offset)); @@ -66,7 +69,8 @@ lsm_error lsm_entry_sync(lsm_store *store, lsm_entry_handle *handle) { } uint64_t entry_size; - lsm_error res = lsm_entry_write_db(&entry_size, store->db_file, handle->wrapper->entry); + lsm_error res = + lsm_entry_write_db(&entry_size, store->db_file, handle->wrapper->entry); fflush(store->db_file); // TODO fsync db file? @@ -76,7 +80,7 @@ lsm_error lsm_entry_sync(lsm_store *store, lsm_entry_handle *handle) { return res; } - + uint64_t entry_index = store->db_file_size; store->db_file_size += entry_size; @@ -92,7 +96,8 @@ lsm_error lsm_entry_sync(lsm_store *store, lsm_entry_handle *handle) { return lsm_error_failed_io; } - res = lsm_entry_write_idx(&entry_size, store->idx_file, handle->wrapper->entry, entry_index, entry_size); + res = lsm_entry_write_idx(&entry_size, store->idx_file, + handle->wrapper->entry, entry_index, entry_size); if (res == lsm_error_ok) { // Update the counter at the beginning of the file @@ -151,14 +156,15 @@ static lsm_error lsm_entry_read_attrs(lsm_entry_handle *handle, FILE *db_file) { if (val_s == NULL) { return lsm_error_failed_alloc; } - + uint64_t read = 0; while (read < nums[1]) { read += fread(&val_s[read], 1, nums[1] - read, db_file); } - LSM_RES(lsm_str_init(&val, val_s));; + LSM_RES(lsm_str_init(&val, val_s)); + ; lsm_entry_attr_insert(handle, nums[0], val); } @@ -172,7 +178,8 @@ lsm_error lsm_store_load_db(lsm_store *store) { lsm_entry_handle *handle; // idx file starts with block count - size_t res = fread(&store->idx_file_block_count, sizeof(uint64_t), 1, store->idx_file); + size_t res = + fread(&store->idx_file_block_count, sizeof(uint64_t), 1, store->idx_file); if (res == 0) { return lsm_error_failed_io; From e10c43dfd6e1cc8b8ede0eb7e2a72f5295835fd9 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Wed, 8 Nov 2023 09:05:38 +0100 Subject: [PATCH 33/70] fix(lsm): work when first creating db --- lsm/src/store/lsm_store.c | 18 ++++++++++++++++++ lsm/src/store/lsm_store_sync.c | 4 ++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/lsm/src/store/lsm_store.c b/lsm/src/store/lsm_store.c index 9c0ab83..43eb475 100644 --- a/lsm/src/store/lsm_store.c +++ b/lsm/src/store/lsm_store.c @@ -44,11 +44,20 @@ lsm_error lsm_store_load(lsm_store **ptr, lsm_str *data_path) { FILE *db_file = fopen(db_file_path, "r+b"); if (db_file == NULL) { + // Create the file first, then reopen it in extended read db_file = fopen(db_file_path, "wb"); if (db_file == NULL) { return lsm_error_failed_io; } + + fclose(db_file); + + FILE *db_file = fopen(db_file_path, "r+b"); + + if (db_file == NULL) { + return lsm_error_failed_io; + } } // Same for idx file @@ -60,6 +69,7 @@ lsm_error lsm_store_load(lsm_store **ptr, lsm_str *data_path) { FILE *idx_file = fopen(idx_file_path, "r+b"); if (idx_file == NULL) { + // Create the file first idx_file = fopen(idx_file_path, "wb"); if (idx_file == NULL) { @@ -75,6 +85,14 @@ lsm_error lsm_store_load(lsm_store **ptr, lsm_str *data_path) { } fflush(idx_file); + fclose(idx_file); + + // If opening it in extended read mode still fails now, there's a problem + FILE *idx_file = fopen(idx_file_path, "r+b"); + + if (idx_file == NULL) { + return lsm_error_failed_io; + } } store->data_path = data_path; diff --git a/lsm/src/store/lsm_store_sync.c b/lsm/src/store/lsm_store_sync.c index caaa42b..3a668d4 100644 --- a/lsm/src/store/lsm_store_sync.c +++ b/lsm/src/store/lsm_store_sync.c @@ -73,8 +73,6 @@ lsm_error lsm_entry_sync(lsm_store *store, lsm_entry_handle *handle) { lsm_entry_write_db(&entry_size, store->db_file, handle->wrapper->entry); fflush(store->db_file); - // TODO fsync db file? - if (res != lsm_error_ok) { pthread_mutex_unlock(&store->db_lock); @@ -177,6 +175,8 @@ lsm_error lsm_store_load_db(lsm_store *store) { lsm_str *key; lsm_entry_handle *handle; + rewind(store->idx_file); + // idx file starts with block count size_t res = fread(&store->idx_file_block_count, sizeof(uint64_t), 1, store->idx_file); From 9c249d40c749d99fd2e785d3dc3da0fd0f41b503 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Wed, 8 Nov 2023 09:11:07 +0100 Subject: [PATCH 34/70] refactor(lsm): better separate store disk functions --- lsm/src/store/lsm_store.c | 76 -------- lsm/src/store/lsm_store_disk_read.c | 183 ++++++++++++++++++ ...sm_store_sync.c => lsm_store_disk_write.c} | 111 ----------- 3 files changed, 183 insertions(+), 187 deletions(-) create mode 100644 lsm/src/store/lsm_store_disk_read.c rename lsm/src/store/{lsm_store_sync.c => lsm_store_disk_write.c} (58%) diff --git a/lsm/src/store/lsm_store.c b/lsm/src/store/lsm_store.c index 43eb475..2345cb8 100644 --- a/lsm/src/store/lsm_store.c +++ b/lsm/src/store/lsm_store.c @@ -30,82 +30,6 @@ lsm_error lsm_store_init(lsm_store **ptr) { return lsm_error_ok; } -lsm_error lsm_store_load(lsm_store **ptr, lsm_str *data_path) { - lsm_store *store; - LSM_RES(lsm_store_init(&store)); - - // Try to open an existing db file or create a new one otherwise - // This shit is why I need to improve the str library - char db_file_path[lsm_str_len(data_path) + strlen(LSM_DB_FILE_NAME) + 2]; - memcpy(db_file_path, lsm_str_ptr(data_path), - lsm_str_len(data_path) * sizeof(char)); - sprintf(&db_file_path[lsm_str_len(data_path)], "/%s", LSM_DB_FILE_NAME); - - FILE *db_file = fopen(db_file_path, "r+b"); - - if (db_file == NULL) { - // Create the file first, then reopen it in extended read - db_file = fopen(db_file_path, "wb"); - - if (db_file == NULL) { - return lsm_error_failed_io; - } - - fclose(db_file); - - FILE *db_file = fopen(db_file_path, "r+b"); - - if (db_file == NULL) { - return lsm_error_failed_io; - } - } - - // Same for idx file - char idx_file_path[lsm_str_len(data_path) + strlen(LSM_IDX_FILE_NAME) + 2]; - memcpy(idx_file_path, lsm_str_ptr(data_path), - lsm_str_len(data_path) * sizeof(char)); - sprintf(&idx_file_path[lsm_str_len(data_path)], "/%s", LSM_IDX_FILE_NAME); - - FILE *idx_file = fopen(idx_file_path, "r+b"); - - if (idx_file == NULL) { - // Create the file first - idx_file = fopen(idx_file_path, "wb"); - - if (idx_file == NULL) { - return lsm_error_failed_io; - } - - // The database code expects the idx file to start with how many blocks it - // contains, so we write that here - uint64_t num = 0; - - if (fwrite(&num, sizeof(uint64_t), 1, idx_file) == 0) { - return lsm_error_failed_io; - } - - fflush(idx_file); - fclose(idx_file); - - // If opening it in extended read mode still fails now, there's a problem - FILE *idx_file = fopen(idx_file_path, "r+b"); - - if (idx_file == NULL) { - return lsm_error_failed_io; - } - } - - store->data_path = data_path; - store->db_file = db_file; - store->idx_file = idx_file; - - LSM_RES(lsm_store_load_db(store)); - - *ptr = store; - - return lsm_error_ok; -} - lsm_error lsm_store_open_read(lsm_entry_handle **out, lsm_store *store, lsm_str *key) { lsm_entry_wrapper *wrapper; diff --git a/lsm/src/store/lsm_store_disk_read.c b/lsm/src/store/lsm_store_disk_read.c new file mode 100644 index 0000000..4c76b76 --- /dev/null +++ b/lsm/src/store/lsm_store_disk_read.c @@ -0,0 +1,183 @@ +#include +#include + +#include "lsm/store_internal.h" + +lsm_error lsm_store_load(lsm_store **ptr, lsm_str *data_path) { + lsm_store *store; + LSM_RES(lsm_store_init(&store)); + + // Try to open an existing db file or create a new one otherwise + // This shit is why I need to improve the str library + char db_file_path[lsm_str_len(data_path) + strlen(LSM_DB_FILE_NAME) + 2]; + memcpy(db_file_path, lsm_str_ptr(data_path), + lsm_str_len(data_path) * sizeof(char)); + sprintf(&db_file_path[lsm_str_len(data_path)], "/%s", LSM_DB_FILE_NAME); + + FILE *db_file = fopen(db_file_path, "r+b"); + + if (db_file == NULL) { + // Create the file first, then reopen it in extended read + db_file = fopen(db_file_path, "wb"); + + if (db_file == NULL) { + return lsm_error_failed_io; + } + + fclose(db_file); + + FILE *db_file = fopen(db_file_path, "r+b"); + + if (db_file == NULL) { + return lsm_error_failed_io; + } + } + + // Same for idx file + char idx_file_path[lsm_str_len(data_path) + strlen(LSM_IDX_FILE_NAME) + 2]; + memcpy(idx_file_path, lsm_str_ptr(data_path), + lsm_str_len(data_path) * sizeof(char)); + sprintf(&idx_file_path[lsm_str_len(data_path)], "/%s", LSM_IDX_FILE_NAME); + + FILE *idx_file = fopen(idx_file_path, "r+b"); + + if (idx_file == NULL) { + // Create the file first + idx_file = fopen(idx_file_path, "wb"); + + if (idx_file == NULL) { + return lsm_error_failed_io; + } + + // The database code expects the idx file to start with how many blocks it + // contains, so we write that here + uint64_t num = 0; + + if (fwrite(&num, sizeof(uint64_t), 1, idx_file) == 0) { + return lsm_error_failed_io; + } + + fflush(idx_file); + fclose(idx_file); + + // If opening it in extended read mode still fails now, there's a problem + FILE *idx_file = fopen(idx_file_path, "r+b"); + + if (idx_file == NULL) { + return lsm_error_failed_io; + } + } + + store->data_path = data_path; + store->db_file = db_file; + store->idx_file = idx_file; + + LSM_RES(lsm_store_load_db(store)); + + *ptr = store; + + return lsm_error_ok; +} + +static lsm_error lsm_entry_read_attrs(lsm_entry_handle *handle, FILE *db_file) { + uint64_t attr_count; + size_t res = fread(&attr_count, sizeof(uint64_t), 1, db_file); + + if (res == 0) { + return lsm_error_failed_io; + } + + // attr_type, val_len + uint64_t nums[2]; + lsm_str *val; + + for (uint64_t i = 0; i < attr_count; i++) { + res = fread(nums, sizeof(uint64_t), 2, db_file); + + if (res < 2) { + return lsm_error_failed_io; + } + + char *val_s = malloc(nums[1] + 1); + val_s[nums[1]] = '\0'; + + if (val_s == NULL) { + return lsm_error_failed_alloc; + } + + uint64_t read = 0; + + while (read < nums[1]) { + read += fread(&val_s[read], 1, nums[1] - read, db_file); + } + + LSM_RES(lsm_str_init(&val, val_s)); + ; + lsm_entry_attr_insert(handle, nums[0], val); + } + + return lsm_error_ok; +} + +lsm_error lsm_store_load_db(lsm_store *store) { + uint64_t key_len; + uint64_t db_dim[2]; + lsm_str *key; + lsm_entry_handle *handle; + + rewind(store->idx_file); + + // idx file starts with block count + size_t res = + fread(&store->idx_file_block_count, sizeof(uint64_t), 1, store->idx_file); + + if (res == 0) { + return lsm_error_failed_io; + } + + store->idx_file_size += sizeof(uint64_t); + + for (uint64_t i = 0; i < store->idx_file_block_count; i++) { + // Read in idx metadata + res = fread(&key_len, sizeof(uint64_t), 1, store->idx_file); + + if (res == 0) { + return lsm_error_failed_io; + } + + char *key_s = malloc(key_len + 1); + key_s[key_len] = '\0'; + + if (key_s == NULL) { + return lsm_error_failed_alloc; + } + + res = fread(key_s, 1, key_len, store->idx_file); + + if (res < key_len) { + return lsm_error_failed_io; + } + + res = fread(db_dim, sizeof(uint64_t), 2, store->idx_file); + + if (res < 2) { + return lsm_error_failed_io; + } + + LSM_RES(lsm_str_init(&key, key_s)); + LSM_RES(lsm_store_insert(&handle, store, key)); + + // Read attributes from database file + if (fseek(store->db_file, db_dim[0], SEEK_SET) != 0) { + return lsm_error_failed_io; + } + + LSM_RES(lsm_entry_read_attrs(handle, store->db_file)); + lsm_entry_close(handle); + + store->idx_file_size += 3 * sizeof(uint64_t) + key_len; + store->db_file_size += db_dim[1]; + } + + return lsm_error_ok; +} diff --git a/lsm/src/store/lsm_store_sync.c b/lsm/src/store/lsm_store_disk_write.c similarity index 58% rename from lsm/src/store/lsm_store_sync.c rename to lsm/src/store/lsm_store_disk_write.c index 3a668d4..8b02319 100644 --- a/lsm/src/store/lsm_store_sync.c +++ b/lsm/src/store/lsm_store_disk_write.c @@ -1,8 +1,3 @@ -#include -#include -#include - -#include "lsm/store.h" #include "lsm/store_internal.h" static lsm_error lsm_entry_write_uint64_t(FILE *f, uint64_t num) { @@ -110,7 +105,6 @@ lsm_error lsm_entry_sync(lsm_store *store, lsm_entry_handle *handle) { size_t r = fwrite(&new_block_count, sizeof(uint64_t), 1, store->idx_file); if (r != lsm_error_ok) { - printf("wuck\n"); pthread_mutex_unlock(&store->idx_lock); return res; @@ -118,8 +112,6 @@ lsm_error lsm_entry_sync(lsm_store *store, lsm_entry_handle *handle) { store->idx_file_size += entry_size; store->idx_file_block_count = new_block_count; - } else { - printf("failed write\n"); } fflush(store->idx_file); @@ -128,106 +120,3 @@ lsm_error lsm_entry_sync(lsm_store *store, lsm_entry_handle *handle) { return res; } - -static lsm_error lsm_entry_read_attrs(lsm_entry_handle *handle, FILE *db_file) { - uint64_t attr_count; - size_t res = fread(&attr_count, sizeof(uint64_t), 1, db_file); - - if (res == 0) { - return lsm_error_failed_io; - } - - // attr_type, val_len - uint64_t nums[2]; - lsm_str *val; - - for (uint64_t i = 0; i < attr_count; i++) { - res = fread(nums, sizeof(uint64_t), 2, db_file); - - if (res < 2) { - return lsm_error_failed_io; - } - - char *val_s = malloc(nums[1] + 1); - val_s[nums[1]] = '\0'; - - if (val_s == NULL) { - return lsm_error_failed_alloc; - } - - uint64_t read = 0; - - while (read < nums[1]) { - read += fread(&val_s[read], 1, nums[1] - read, db_file); - } - - LSM_RES(lsm_str_init(&val, val_s)); - ; - lsm_entry_attr_insert(handle, nums[0], val); - } - - return lsm_error_ok; -} - -lsm_error lsm_store_load_db(lsm_store *store) { - uint64_t key_len; - uint64_t db_dim[2]; - lsm_str *key; - lsm_entry_handle *handle; - - rewind(store->idx_file); - - // idx file starts with block count - size_t res = - fread(&store->idx_file_block_count, sizeof(uint64_t), 1, store->idx_file); - - if (res == 0) { - return lsm_error_failed_io; - } - - store->idx_file_size += sizeof(uint64_t); - - for (uint64_t i = 0; i < store->idx_file_block_count; i++) { - // Read in idx metadata - res = fread(&key_len, sizeof(uint64_t), 1, store->idx_file); - - if (res == 0) { - return lsm_error_failed_io; - } - - char *key_s = malloc(key_len + 1); - key_s[key_len] = '\0'; - - if (key_s == NULL) { - return lsm_error_failed_alloc; - } - - res = fread(key_s, 1, key_len, store->idx_file); - - if (res < key_len) { - return lsm_error_failed_io; - } - - res = fread(db_dim, sizeof(uint64_t), 2, store->idx_file); - - if (res < 2) { - return lsm_error_failed_io; - } - - LSM_RES(lsm_str_init(&key, key_s)); - LSM_RES(lsm_store_insert(&handle, store, key)); - - // Read attributes from database file - if (fseek(store->db_file, db_dim[0], SEEK_SET) != 0) { - return lsm_error_failed_io; - } - - LSM_RES(lsm_entry_read_attrs(handle, store->db_file)); - lsm_entry_close(handle); - - store->idx_file_size += 3 * sizeof(uint64_t) + key_len; - store->db_file_size += db_dim[1]; - } - - return lsm_error_ok; -} From 226873219bd11dbf800f44e6ca9dfa4068290247 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Wed, 8 Nov 2023 10:40:12 +0100 Subject: [PATCH 35/70] refactor(lsm): slightly clean up disk write code --- lsm/src/store/lsm_store_disk_write.c | 60 ++++++++++++---------------- 1 file changed, 25 insertions(+), 35 deletions(-) diff --git a/lsm/src/store/lsm_store_disk_write.c b/lsm/src/store/lsm_store_disk_write.c index 8b02319..9813f40 100644 --- a/lsm/src/store/lsm_store_disk_write.c +++ b/lsm/src/store/lsm_store_disk_write.c @@ -22,7 +22,18 @@ static lsm_error lsm_entry_write_str(FILE *f, lsm_str *s) { return lsm_error_ok; } -lsm_error lsm_entry_write_db(uint64_t *size, FILE *db_file, lsm_entry *entry) { +static lsm_error lsm_seek(FILE *f, uint64_t pos) { + if (fseek(f, pos, SEEK_SET) != 0) { + return lsm_error_failed_io; + } + + return lsm_error_ok; +} + +lsm_error lsm_entry_write_db(uint64_t *size, FILE *db_file, lsm_entry *entry, + uint64_t pos) { + LSM_RES(lsm_seek(db_file, pos)); + // First we write how many attributes follow LSM_RES(lsm_entry_write_uint64_t(db_file, entry->attrs.count)); *size = sizeof(uint64_t); @@ -42,7 +53,8 @@ lsm_error lsm_entry_write_db(uint64_t *size, FILE *db_file, lsm_entry *entry) { } lsm_error lsm_entry_write_idx(uint64_t *size, FILE *idx_file, lsm_entry *entry, - uint64_t offset, uint64_t len) { + uint64_t offset, uint64_t len, uint64_t pos) { + LSM_RES(lsm_seek(idx_file, pos)); LSM_RES(lsm_entry_write_uint64_t(idx_file, lsm_str_len(entry->key))); LSM_RES(lsm_entry_write_str(idx_file, entry->key)); LSM_RES(lsm_entry_write_uint64_t(idx_file, offset)); @@ -56,16 +68,9 @@ lsm_error lsm_entry_write_idx(uint64_t *size, FILE *idx_file, lsm_entry *entry, lsm_error lsm_entry_sync(lsm_store *store, lsm_entry_handle *handle) { pthread_mutex_lock(&store->db_lock); - // Append entry to end of database file - if (fseek(store->db_file, store->db_file_size, SEEK_SET) != 0) { - pthread_mutex_unlock(&store->db_lock); - - return lsm_error_failed_io; - } - uint64_t entry_size; - lsm_error res = - lsm_entry_write_db(&entry_size, store->db_file, handle->wrapper->entry); + lsm_error res = lsm_entry_write_db( + &entry_size, store->db_file, handle->wrapper->entry, store->db_file_size); fflush(store->db_file); if (res != lsm_error_ok) { @@ -82,40 +87,25 @@ lsm_error lsm_entry_sync(lsm_store *store, lsm_entry_handle *handle) { // Append entry to index file pthread_mutex_lock(&store->idx_lock); - if (fseek(store->idx_file, store->idx_file_size, SEEK_SET) != 0) { - printf("failed seek, %lu\n", store->idx_file_size); - pthread_mutex_unlock(&store->idx_lock); - - return lsm_error_failed_io; - } - - res = lsm_entry_write_idx(&entry_size, store->idx_file, - handle->wrapper->entry, entry_index, entry_size); + res = + lsm_entry_write_idx(&entry_size, store->idx_file, handle->wrapper->entry, + entry_index, entry_size, store->idx_file_size); if (res == lsm_error_ok) { // Update the counter at the beginning of the file + rewind(store->idx_file); + uint64_t new_block_count = store->idx_file_block_count + 1; - if (fseek(store->idx_file, 0, SEEK_SET) != 0) { - pthread_mutex_unlock(&store->idx_lock); + res = lsm_entry_write_uint64_t(store->idx_file, new_block_count); - return lsm_error_failed_io; + if (res == lsm_error_ok) { + store->idx_file_size += entry_size; + store->idx_file_block_count = new_block_count; } - - size_t r = fwrite(&new_block_count, sizeof(uint64_t), 1, store->idx_file); - - if (r != lsm_error_ok) { - pthread_mutex_unlock(&store->idx_lock); - - return res; - } - - store->idx_file_size += entry_size; - store->idx_file_block_count = new_block_count; } fflush(store->idx_file); - pthread_mutex_unlock(&store->idx_lock); return res; From 535b92a6b6751711edfdb9d6e4158d045f6e1559 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Wed, 8 Nov 2023 11:19:33 +0100 Subject: [PATCH 36/70] feat(lander): integrate persistent insert & get lsm store --- .gitignore | 1 + include/lander.h | 4 ++++ lsm/include/lsm/store.h | 3 ++- lsm/src/store/lsm_store_disk_write.c | 3 +++ src/lander/lander.c | 8 +++++--- src/lander/lander_get.c | 27 +++++++++++++++++---------- src/lander/lander_post.c | 24 ++++++++++++++++++++++++ src/main.c | 12 +++++++----- 8 files changed, 63 insertions(+), 19 deletions(-) diff --git a/.gitignore b/.gitignore index 35f41da..6f9958c 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ lander.data* pastes/ .cache/ vgcore.* +data/ diff --git a/include/lander.h b/include/lander.h index 0579a65..0ffca46 100644 --- a/include/lander.h +++ b/include/lander.h @@ -49,4 +49,8 @@ bool lander_stream_body_to_client(event_loop_conn *conn); bool lander_get_entry_lsm(event_loop_conn *conn); +bool lander_post_redirect_body_to_attr(event_loop_conn *conn); + +bool lander_entry_sync(event_loop_conn *conn); + #endif diff --git a/lsm/include/lsm/store.h b/lsm/include/lsm/store.h index 72334c6..16798f2 100644 --- a/lsm/include/lsm/store.h +++ b/lsm/include/lsm/store.h @@ -17,7 +17,8 @@ */ typedef enum lsm_attr_type : uint64_t { lsm_attr_type_entry_type = 1 << 0, - lsm_attr_type_content_type = 1 << 1 + lsm_attr_type_content_type = 1 << 1, + lsm_attr_type_url = 1 << 2, } lsm_attr_type; /** diff --git a/lsm/src/store/lsm_store_disk_write.c b/lsm/src/store/lsm_store_disk_write.c index 9813f40..07034a1 100644 --- a/lsm/src/store/lsm_store_disk_write.c +++ b/lsm/src/store/lsm_store_disk_write.c @@ -100,6 +100,9 @@ lsm_error lsm_entry_sync(lsm_store *store, lsm_entry_handle *handle) { res = lsm_entry_write_uint64_t(store->idx_file, new_block_count); if (res == lsm_error_ok) { + // Only if we successfully updated the on-disk counter do we make the code + // aware that the file's size has increased. This way, if a write to the + // counter fails, the code will simply reuse the already written content. store->idx_file_size += entry_size; store->idx_file_block_count = new_block_count; } diff --git a/src/lander/lander.c b/src/lander/lander.c index c4c4ca7..8045f28 100644 --- a/src/lander/lander.c +++ b/src/lander/lander.c @@ -23,8 +23,9 @@ http_route lander_routes[] = { .type = http_route_regex, .method = http_post, .path = "^/s(l?)/([^/]*)$", - .steps = {http_loop_step_auth, http_loop_step_parse_content_length, - lander_post_redirect_lsm, lander_stream_body_to_entry, NULL}, + .steps = {http_loop_step_auth, lander_post_redirect_lsm, + http_loop_step_body_to_buf, lander_post_redirect_body_to_attr, + lander_entry_sync, NULL}, .steps_res = {http_loop_step_write_header, http_loop_step_write_body, NULL}, }, @@ -32,7 +33,8 @@ http_route lander_routes[] = { .method = http_post, .path = "^/p(l?)/([^/]*)$", .steps = {http_loop_step_auth, http_loop_step_parse_content_length, - lander_post_paste_lsm, lander_stream_body_to_entry, NULL}, + lander_post_paste_lsm, lander_stream_body_to_entry, + lander_entry_sync, NULL}, .steps_res = {http_loop_step_write_header, http_loop_step_write_body, NULL}}, }; diff --git a/src/lander/lander_get.c b/src/lander/lander_get.c index d2fee1e..026f449 100644 --- a/src/lander/lander_get.c +++ b/src/lander/lander_get.c @@ -3,6 +3,7 @@ #include "event_loop.h" #include "http/types.h" #include "lander.h" +#include "log.h" #include "lsm/store.h" static const char index_page[] = @@ -82,19 +83,25 @@ bool lander_get_entry_lsm(event_loop_conn *conn) { lsm_attr_type_entry_type); if (t == lander_entry_type_redirect) { - // Stream entire redirect data into buffer to set as header - uint64_t data_len = lsm_entry_data_len(c_ctx->entry); - char *buf = malloc(data_len + 1); - uint64_t read = 0; - uint64_t total_read = 0; + // For redirects, the URL is stored as an in-memory attribute + lsm_str *url_attr_val; - while (total_read < data_len) { - lsm_entry_data_read(&read, &buf[total_read], c_ctx->entry, - data_len - total_read); - total_read += read; + // This shouldn't be able to happen + if (lsm_entry_attr_get(&url_attr_val, c_ctx->entry, lsm_attr_type_url) != + lsm_error_ok) { + error("Entry of type redirect detected without URL attribute"); + + ctx->res.status = http_internal_server_error; + lsm_entry_close(c_ctx->entry); + c_ctx->entry = NULL; + + return true; } - buf[data_len] = '\0'; + char *buf = malloc(lsm_str_len(url_attr_val) + 1); + memcpy(buf, lsm_str_ptr(url_attr_val), lsm_str_len(url_attr_val)); + + buf[lsm_str_len(url_attr_val)] = '\0'; ctx->res.status = http_moved_permanently; http_res_add_header(&ctx->res, http_header_location, buf, true); diff --git a/src/lander/lander_post.c b/src/lander/lander_post.c index da9d1c4..b03cd53 100644 --- a/src/lander/lander_post.c +++ b/src/lander/lander_post.c @@ -138,6 +138,30 @@ bool lander_post_redirect_lsm(event_loop_conn *conn) { return true; } +bool lander_post_redirect_body_to_attr(event_loop_conn *conn) { + http_loop_ctx *ctx = conn->ctx; + lander_ctx *c_ctx = ctx->c; + + lsm_str *attr_value; + lsm_str_init_copy_n(&attr_value, ctx->req.body.buf, ctx->req.body.len); + lsm_entry_attr_insert(c_ctx->entry, lsm_attr_type_url, attr_value); + + return true; +} + +bool lander_entry_sync(event_loop_conn *conn) { + http_loop_ctx *ctx = conn->ctx; + http_loop_gctx *gctx = ctx->g; + lander_gctx *c_gctx = gctx->c; + lander_ctx *c_ctx = ctx->c; + + if (lsm_entry_sync(c_gctx->store, c_ctx->entry) != lsm_error_ok) { + ctx->res.status = http_internal_server_error; + } + + return true; +} + bool lander_post_paste_lsm(event_loop_conn *conn) { http_loop_ctx *ctx = conn->ctx; lander_ctx *c_ctx = ctx->c; diff --git a/src/main.c b/src/main.c index f552081..f854f3e 100644 --- a/src/main.c +++ b/src/main.c @@ -23,7 +23,7 @@ int main() { ENV(api_key, "LANDER_API_KEY"); ENV_OPT(port_str, "LANDER_PORT", "18080"); - ENV_OPT(data_dir, "LANDER_DATA_DIR", "."); + ENV_OPT(data_dir_s, "LANDER_DATA_DIR", "."); int port = atoi(port_str); @@ -46,12 +46,14 @@ int main() { /* info("Trie initialized and populated with %i entries", trie_size(trie)); */ lander_gctx *c_gctx = lander_gctx_init(); - c_gctx->data_dir = data_dir; + c_gctx->data_dir = data_dir_s; /* c_gctx->trie = trie; */ - lsm_str *data_dir2; - lsm_str_init_copy(&data_dir2, "data"); - lsm_store_load(&c_gctx->store, data_dir2); + lsm_str *data_dir; + lsm_str_init_copy(&data_dir, (char *)data_dir_s); + if (lsm_store_load(&c_gctx->store, data_dir) != lsm_error_ok) { + critical(2, "Failed to load existing store."); + } http_loop *hl = http_loop_init( lander_routes, sizeof(lander_routes) / sizeof(lander_routes[0]), c_gctx, From b5fc3a3612520ddec4db9d62268df35ca80547ed Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Wed, 8 Nov 2023 12:25:47 +0100 Subject: [PATCH 37/70] refactor(lsm): decouple attribute types --- include/lander.h | 6 ++++++ lsm/include/lsm/store.h | 26 +++++++------------------- lsm/src/_include/lsm/store_internal.h | 2 +- lsm/src/store/lsm_store_entry.c | 14 +++++++------- src/lander/lander_get.c | 4 ++-- src/lander/lander_post.c | 6 +++--- 6 files changed, 26 insertions(+), 32 deletions(-) diff --git a/include/lander.h b/include/lander.h index 0ffca46..0697cce 100644 --- a/include/lander.h +++ b/include/lander.h @@ -18,6 +18,12 @@ typedef struct lander_ctx { uint64_t remaining_data; } lander_ctx; +typedef enum lander_attr_type : uint64_t { + lander_attr_type_entry_type = 1 << 0, + lander_attr_type_content_type = 1 << 1, + lander_attr_type_url = 1 << 2, +} lander_attr_type; + typedef enum lander_entry_type { lander_entry_type_redirect = 0, lander_entry_type_paste = 1, diff --git a/lsm/include/lsm/store.h b/lsm/include/lsm/store.h index 16798f2..37fdb9d 100644 --- a/lsm/include/lsm/store.h +++ b/lsm/include/lsm/store.h @@ -9,18 +9,6 @@ #define LSM_STORE_DISK_THRESHOLD 1024 -/** - * The type of an entry attribute. - * - * Each type is represented as a single bit of an - * integer, so they can be easily combined into a bitmap. - */ -typedef enum lsm_attr_type : uint64_t { - lsm_attr_type_entry_type = 1 << 0, - lsm_attr_type_content_type = 1 << 1, - lsm_attr_type_url = 1 << 2, -} lsm_attr_type; - /** * A handle referencing an entry inside a store. Read/write operations from/to * the entry go through this handle. @@ -33,7 +21,7 @@ typedef struct lsm_entry_handle lsm_entry_handle; * @param entry entry to check * @param type type of attribute to check for */ -bool lsm_entry_attr_present(lsm_entry_handle *handle, lsm_attr_type type); +bool lsm_entry_attr_present(lsm_entry_handle *handle, uint64_t type); /** * Retrieve the contents of an attribute from an entry, if present @@ -43,7 +31,7 @@ bool lsm_entry_attr_present(lsm_entry_handle *handle, lsm_attr_type type); * @param type type of attribute to return */ lsm_error lsm_entry_attr_get(lsm_str **out, lsm_entry_handle *handle, - lsm_attr_type type); + uint64_t type); /** * Convenience wrapper around `lsm_entry_attr_get` that can be used if we know @@ -54,7 +42,7 @@ lsm_error lsm_entry_attr_get(lsm_str **out, lsm_entry_handle *handle, * @param type type of attribute to return */ lsm_error lsm_entry_attr_get_num(uint64_t *out, lsm_entry_handle *handle, - lsm_attr_type type); + uint64_t type); /** * Add a new attribute to the entry. @@ -63,7 +51,7 @@ lsm_error lsm_entry_attr_get_num(uint64_t *out, lsm_entry_handle *handle, * @param type type of attribute to add * @param data data of attribute; ownership of pointer is taken over */ -lsm_error lsm_entry_attr_insert(lsm_entry_handle *handle, lsm_attr_type type, +lsm_error lsm_entry_attr_insert(lsm_entry_handle *handle, uint64_t type, lsm_str *data); /** @@ -74,8 +62,8 @@ lsm_error lsm_entry_attr_insert(lsm_entry_handle *handle, lsm_attr_type type, * @param type type of attribute to add * @param data data of attribute */ -lsm_error lsm_entry_attr_insert_num(lsm_entry_handle *handle, - lsm_attr_type type, uint64_t data); +lsm_error lsm_entry_attr_insert_num(lsm_entry_handle *handle, uint64_t type, + uint64_t data); /** * Remove an atribute from the given entry, if present. @@ -86,7 +74,7 @@ lsm_error lsm_entry_attr_insert_num(lsm_entry_handle *handle, * @param type type of attribute to remove */ lsm_error lsm_entry_attr_remove(lsm_str **out, lsm_entry_handle *handle, - lsm_attr_type type); + uint64_t type); /** * A store consisting of LSM entries. diff --git a/lsm/src/_include/lsm/store_internal.h b/lsm/src/_include/lsm/store_internal.h index b9546e9..f426d34 100644 --- a/lsm/src/_include/lsm/store_internal.h +++ b/lsm/src/_include/lsm/store_internal.h @@ -12,7 +12,7 @@ #define LSM_IDX_FILE_NAME "lsm.idx" typedef struct lsm_attr { - lsm_attr_type type; + uint64_t type; lsm_str *str; } lsm_attr; diff --git a/lsm/src/store/lsm_store_entry.c b/lsm/src/store/lsm_store_entry.c index 2878996..ea150f0 100644 --- a/lsm/src/store/lsm_store_entry.c +++ b/lsm/src/store/lsm_store_entry.c @@ -56,12 +56,12 @@ void lsm_entry_close(lsm_entry_handle *handle) { free(handle); } -bool lsm_entry_attr_present(lsm_entry_handle *handle, lsm_attr_type type) { +bool lsm_entry_attr_present(lsm_entry_handle *handle, uint64_t type) { return (handle->wrapper->entry->attrs.bitmap & type) != 0; } lsm_error lsm_entry_attr_get(lsm_str **out, lsm_entry_handle *handle, - lsm_attr_type type) { + uint64_t type) { if (!lsm_entry_attr_present(handle, type)) { return lsm_error_not_found; } @@ -79,7 +79,7 @@ lsm_error lsm_entry_attr_get(lsm_str **out, lsm_entry_handle *handle, } lsm_error lsm_entry_attr_get_num(uint64_t *out, lsm_entry_handle *handle, - lsm_attr_type type) { + uint64_t type) { lsm_str *s; LSM_RES(lsm_entry_attr_get(&s, handle, type)); @@ -96,7 +96,7 @@ lsm_error lsm_entry_attr_get_num(uint64_t *out, lsm_entry_handle *handle, } lsm_error lsm_entry_attr_remove(lsm_str **out, lsm_entry_handle *handle, - lsm_attr_type type) { + uint64_t type) { if (!lsm_entry_attr_present(handle, type)) { return lsm_error_not_found; } @@ -143,7 +143,7 @@ lsm_error lsm_entry_attr_remove(lsm_str **out, lsm_entry_handle *handle, return lsm_error_ok; } -lsm_error lsm_entry_attr_insert(lsm_entry_handle *handle, lsm_attr_type type, +lsm_error lsm_entry_attr_insert(lsm_entry_handle *handle, uint64_t type, lsm_str *data) { if (lsm_entry_attr_present(handle, type)) { return lsm_error_already_present; @@ -168,8 +168,8 @@ lsm_error lsm_entry_attr_insert(lsm_entry_handle *handle, lsm_attr_type type, return lsm_error_ok; } -lsm_error lsm_entry_attr_insert_num(lsm_entry_handle *handle, - lsm_attr_type type, uint64_t data) { +lsm_error lsm_entry_attr_insert_num(lsm_entry_handle *handle, uint64_t type, + uint64_t data) { lsm_str *s; LSM_RES( lsm_str_init_copy_n(&s, (char *)&data, sizeof(uint64_t) / sizeof(char))); diff --git a/src/lander/lander_get.c b/src/lander/lander_get.c index 026f449..b063e29 100644 --- a/src/lander/lander_get.c +++ b/src/lander/lander_get.c @@ -80,14 +80,14 @@ bool lander_get_entry_lsm(event_loop_conn *conn) { lander_entry_type t; lsm_entry_attr_get_num((uint64_t *)&t, c_ctx->entry, - lsm_attr_type_entry_type); + lander_attr_type_entry_type); if (t == lander_entry_type_redirect) { // For redirects, the URL is stored as an in-memory attribute lsm_str *url_attr_val; // This shouldn't be able to happen - if (lsm_entry_attr_get(&url_attr_val, c_ctx->entry, lsm_attr_type_url) != + if (lsm_entry_attr_get(&url_attr_val, c_ctx->entry, lander_attr_type_url) != lsm_error_ok) { error("Entry of type redirect detected without URL attribute"); diff --git a/src/lander/lander_post.c b/src/lander/lander_post.c index b03cd53..05dc662 100644 --- a/src/lander/lander_post.c +++ b/src/lander/lander_post.c @@ -132,7 +132,7 @@ bool lander_post_redirect_lsm(event_loop_conn *conn) { return true; } - lsm_entry_attr_insert_num(c_ctx->entry, lsm_attr_type_entry_type, + lsm_entry_attr_insert_num(c_ctx->entry, lander_attr_type_entry_type, lander_entry_type_redirect); return true; @@ -144,7 +144,7 @@ bool lander_post_redirect_body_to_attr(event_loop_conn *conn) { lsm_str *attr_value; lsm_str_init_copy_n(&attr_value, ctx->req.body.buf, ctx->req.body.len); - lsm_entry_attr_insert(c_ctx->entry, lsm_attr_type_url, attr_value); + lsm_entry_attr_insert(c_ctx->entry, lander_attr_type_url, attr_value); return true; } @@ -171,7 +171,7 @@ bool lander_post_paste_lsm(event_loop_conn *conn) { return true; } - lsm_entry_attr_insert_num(c_ctx->entry, lsm_attr_type_entry_type, + lsm_entry_attr_insert_num(c_ctx->entry, lander_attr_type_entry_type, lander_entry_type_paste); return true; From 35c301955f50db7a3d76adb4a425a7bc86803289 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Wed, 8 Nov 2023 13:42:46 +0100 Subject: [PATCH 38/70] refactor(lsm): switch to uint8_t attr types; refactor disk code --- include/lander.h | 8 +- lsm/include/lsm/store.h | 12 +-- lsm/src/_include/lsm/store_internal.h | 6 +- lsm/src/store/lsm_store_disk_read.c | 112 +++++++++++--------------- lsm/src/store/lsm_store_disk_write.c | 21 +++-- lsm/src/store/lsm_store_entry.c | 21 ++--- 6 files changed, 84 insertions(+), 96 deletions(-) diff --git a/include/lander.h b/include/lander.h index 0697cce..5abea75 100644 --- a/include/lander.h +++ b/include/lander.h @@ -18,10 +18,10 @@ typedef struct lander_ctx { uint64_t remaining_data; } lander_ctx; -typedef enum lander_attr_type : uint64_t { - lander_attr_type_entry_type = 1 << 0, - lander_attr_type_content_type = 1 << 1, - lander_attr_type_url = 1 << 2, +typedef enum lander_attr_type : uint8_t { + lander_attr_type_entry_type = 0, + lander_attr_type_content_type = 1, + lander_attr_type_url = 2, } lander_attr_type; typedef enum lander_entry_type { diff --git a/lsm/include/lsm/store.h b/lsm/include/lsm/store.h index 37fdb9d..cf0c9f7 100644 --- a/lsm/include/lsm/store.h +++ b/lsm/include/lsm/store.h @@ -21,7 +21,7 @@ typedef struct lsm_entry_handle lsm_entry_handle; * @param entry entry to check * @param type type of attribute to check for */ -bool lsm_entry_attr_present(lsm_entry_handle *handle, uint64_t type); +bool lsm_entry_attr_present(lsm_entry_handle *handle, uint8_t type); /** * Retrieve the contents of an attribute from an entry, if present @@ -31,7 +31,7 @@ bool lsm_entry_attr_present(lsm_entry_handle *handle, uint64_t type); * @param type type of attribute to return */ lsm_error lsm_entry_attr_get(lsm_str **out, lsm_entry_handle *handle, - uint64_t type); + uint8_t type); /** * Convenience wrapper around `lsm_entry_attr_get` that can be used if we know @@ -42,7 +42,7 @@ lsm_error lsm_entry_attr_get(lsm_str **out, lsm_entry_handle *handle, * @param type type of attribute to return */ lsm_error lsm_entry_attr_get_num(uint64_t *out, lsm_entry_handle *handle, - uint64_t type); + uint8_t type); /** * Add a new attribute to the entry. @@ -51,7 +51,7 @@ lsm_error lsm_entry_attr_get_num(uint64_t *out, lsm_entry_handle *handle, * @param type type of attribute to add * @param data data of attribute; ownership of pointer is taken over */ -lsm_error lsm_entry_attr_insert(lsm_entry_handle *handle, uint64_t type, +lsm_error lsm_entry_attr_insert(lsm_entry_handle *handle, uint8_t type, lsm_str *data); /** @@ -62,7 +62,7 @@ lsm_error lsm_entry_attr_insert(lsm_entry_handle *handle, uint64_t type, * @param type type of attribute to add * @param data data of attribute */ -lsm_error lsm_entry_attr_insert_num(lsm_entry_handle *handle, uint64_t type, +lsm_error lsm_entry_attr_insert_num(lsm_entry_handle *handle, uint8_t type, uint64_t data); /** @@ -74,7 +74,7 @@ lsm_error lsm_entry_attr_insert_num(lsm_entry_handle *handle, uint64_t type, * @param type type of attribute to remove */ lsm_error lsm_entry_attr_remove(lsm_str **out, lsm_entry_handle *handle, - uint64_t type); + uint8_t type); /** * A store consisting of LSM entries. diff --git a/lsm/src/_include/lsm/store_internal.h b/lsm/src/_include/lsm/store_internal.h index f426d34..b8c24dc 100644 --- a/lsm/src/_include/lsm/store_internal.h +++ b/lsm/src/_include/lsm/store_internal.h @@ -12,7 +12,7 @@ #define LSM_IDX_FILE_NAME "lsm.idx" typedef struct lsm_attr { - uint64_t type; + uint8_t type; lsm_str *str; } lsm_attr; @@ -26,8 +26,8 @@ typedef struct lsm_attr { typedef struct lsm_entry { lsm_str *key; struct { - uint64_t count; - uint64_t bitmap; + uint64_t bitmap[4]; + uint8_t count; lsm_attr *items; } attrs; struct { diff --git a/lsm/src/store/lsm_store_disk_read.c b/lsm/src/store/lsm_store_disk_read.c index 4c76b76..e644846 100644 --- a/lsm/src/store/lsm_store_disk_read.c +++ b/lsm/src/store/lsm_store_disk_read.c @@ -1,6 +1,7 @@ #include #include +#include "lsm.h" #include "lsm/store_internal.h" lsm_error lsm_store_load(lsm_store **ptr, lsm_str *data_path) { @@ -79,48 +80,59 @@ lsm_error lsm_store_load(lsm_store **ptr, lsm_str *data_path) { return lsm_error_ok; } -static lsm_error lsm_entry_read_attrs(lsm_entry_handle *handle, FILE *db_file) { - uint64_t attr_count; - size_t res = fread(&attr_count, sizeof(uint64_t), 1, db_file); +static lsm_error lsm_fread(void *out, uint64_t *sum, FILE *f, uint64_t size, + uint64_t count) { + size_t res = fread(out, size, count, f); - if (res == 0) { + if (res < count) { return lsm_error_failed_io; } + *sum += size * count; + + return lsm_error_ok; +} + +static lsm_error lsm_entry_read_str(lsm_str **out, uint64_t *sum, FILE *f) { + uint64_t len; + LSM_RES(lsm_fread(&len, sum, f, sizeof(uint64_t), 1)); + + char *buf = malloc(len + 1); + + if (buf == NULL) { + return lsm_error_failed_alloc; + } + + uint64_t read = 0; + + while (read < len) { + read += fread(&buf[read], 1, len - read, f); + } + + *sum += len; + + return lsm_str_init(out, buf); +} + +static lsm_error lsm_entry_read_attrs(uint64_t *sum, lsm_entry_handle *handle, + FILE *db_file) { + uint8_t attr_count; + LSM_RES(lsm_fread(&attr_count, sum, db_file, sizeof(uint8_t), 1)); + // attr_type, val_len - uint64_t nums[2]; + uint8_t attr_type; lsm_str *val; for (uint64_t i = 0; i < attr_count; i++) { - res = fread(nums, sizeof(uint64_t), 2, db_file); - - if (res < 2) { - return lsm_error_failed_io; - } - - char *val_s = malloc(nums[1] + 1); - val_s[nums[1]] = '\0'; - - if (val_s == NULL) { - return lsm_error_failed_alloc; - } - - uint64_t read = 0; - - while (read < nums[1]) { - read += fread(&val_s[read], 1, nums[1] - read, db_file); - } - - LSM_RES(lsm_str_init(&val, val_s)); - ; - lsm_entry_attr_insert(handle, nums[0], val); + LSM_RES(lsm_fread(&attr_type, sum, db_file, sizeof(uint8_t), 1)); + LSM_RES(lsm_entry_read_str(&val, sum, db_file)); + lsm_entry_attr_insert(handle, attr_type, val); } return lsm_error_ok; } lsm_error lsm_store_load_db(lsm_store *store) { - uint64_t key_len; uint64_t db_dim[2]; lsm_str *key; lsm_entry_handle *handle; @@ -128,43 +140,13 @@ lsm_error lsm_store_load_db(lsm_store *store) { rewind(store->idx_file); // idx file starts with block count - size_t res = - fread(&store->idx_file_block_count, sizeof(uint64_t), 1, store->idx_file); - - if (res == 0) { - return lsm_error_failed_io; - } - - store->idx_file_size += sizeof(uint64_t); + LSM_RES(lsm_fread(&store->idx_file_block_count, &store->idx_file_size, + store->idx_file, sizeof(uint64_t), 1)); for (uint64_t i = 0; i < store->idx_file_block_count; i++) { - // Read in idx metadata - res = fread(&key_len, sizeof(uint64_t), 1, store->idx_file); - - if (res == 0) { - return lsm_error_failed_io; - } - - char *key_s = malloc(key_len + 1); - key_s[key_len] = '\0'; - - if (key_s == NULL) { - return lsm_error_failed_alloc; - } - - res = fread(key_s, 1, key_len, store->idx_file); - - if (res < key_len) { - return lsm_error_failed_io; - } - - res = fread(db_dim, sizeof(uint64_t), 2, store->idx_file); - - if (res < 2) { - return lsm_error_failed_io; - } - - LSM_RES(lsm_str_init(&key, key_s)); + LSM_RES(lsm_entry_read_str(&key, &store->idx_file_size, store->idx_file)); + LSM_RES(lsm_fread(&db_dim, &store->idx_file_size, store->idx_file, + sizeof(uint64_t), 2)); LSM_RES(lsm_store_insert(&handle, store, key)); // Read attributes from database file @@ -172,10 +154,10 @@ lsm_error lsm_store_load_db(lsm_store *store) { return lsm_error_failed_io; } - LSM_RES(lsm_entry_read_attrs(handle, store->db_file)); + LSM_RES( + lsm_entry_read_attrs(&store->idx_file_size, handle, store->db_file)); lsm_entry_close(handle); - store->idx_file_size += 3 * sizeof(uint64_t) + key_len; store->db_file_size += db_dim[1]; } diff --git a/lsm/src/store/lsm_store_disk_write.c b/lsm/src/store/lsm_store_disk_write.c index 07034a1..4bdd297 100644 --- a/lsm/src/store/lsm_store_disk_write.c +++ b/lsm/src/store/lsm_store_disk_write.c @@ -1,9 +1,8 @@ #include "lsm/store_internal.h" -static lsm_error lsm_entry_write_uint64_t(FILE *f, uint64_t num) { - size_t res = fwrite(&num, sizeof(uint64_t), 1, f); +static lsm_error lsm_entry_write_single(FILE *f, uint64_t size, void *val) { + size_t res = fwrite(val, size, 1, f); - // Such a small write should succeed in one go if (res == 0) { return lsm_error_failed_io; } @@ -11,6 +10,10 @@ static lsm_error lsm_entry_write_uint64_t(FILE *f, uint64_t num) { return lsm_error_ok; } +static lsm_error lsm_entry_write_uint64_t(FILE *f, uint64_t num) { + return lsm_entry_write_single(f, sizeof(uint64_t), &num); +} + static lsm_error lsm_entry_write_str(FILE *f, lsm_str *s) { uint64_t to_write = lsm_str_len(s); uint64_t written = 0; @@ -35,17 +38,19 @@ lsm_error lsm_entry_write_db(uint64_t *size, FILE *db_file, lsm_entry *entry, LSM_RES(lsm_seek(db_file, pos)); // First we write how many attributes follow - LSM_RES(lsm_entry_write_uint64_t(db_file, entry->attrs.count)); - *size = sizeof(uint64_t); + LSM_RES( + lsm_entry_write_single(db_file, sizeof(uint8_t), &entry->attrs.count)); + *size = sizeof(uint8_t); - for (uint64_t i = 0; i < entry->attrs.count; i++) { + for (uint8_t i = 0; i < entry->attrs.count; i++) { // Write attribute type, length & value - LSM_RES(lsm_entry_write_uint64_t(db_file, entry->attrs.items[i].type)); + LSM_RES(lsm_entry_write_single(db_file, sizeof(uint8_t), + &entry->attrs.items[i].type)); LSM_RES(lsm_entry_write_uint64_t(db_file, lsm_str_len(entry->attrs.items[i].str))); LSM_RES(lsm_entry_write_str(db_file, entry->attrs.items[i].str)); - *size += 2 * sizeof(uint64_t) + + *size += sizeof(uint8_t) + sizeof(uint64_t) + lsm_str_len(entry->attrs.items[i].str) * sizeof(char); } diff --git a/lsm/src/store/lsm_store_entry.c b/lsm/src/store/lsm_store_entry.c index ea150f0..51dd998 100644 --- a/lsm/src/store/lsm_store_entry.c +++ b/lsm/src/store/lsm_store_entry.c @@ -56,12 +56,13 @@ void lsm_entry_close(lsm_entry_handle *handle) { free(handle); } -bool lsm_entry_attr_present(lsm_entry_handle *handle, uint64_t type) { - return (handle->wrapper->entry->attrs.bitmap & type) != 0; +bool lsm_entry_attr_present(lsm_entry_handle *handle, uint8_t type) { + return (handle->wrapper->entry->attrs.bitmap[type / 64] & + (((uint64_t)1) << (type % 64))) != 0; } lsm_error lsm_entry_attr_get(lsm_str **out, lsm_entry_handle *handle, - uint64_t type) { + uint8_t type) { if (!lsm_entry_attr_present(handle, type)) { return lsm_error_not_found; } @@ -79,7 +80,7 @@ lsm_error lsm_entry_attr_get(lsm_str **out, lsm_entry_handle *handle, } lsm_error lsm_entry_attr_get_num(uint64_t *out, lsm_entry_handle *handle, - uint64_t type) { + uint8_t type) { lsm_str *s; LSM_RES(lsm_entry_attr_get(&s, handle, type)); @@ -96,7 +97,7 @@ lsm_error lsm_entry_attr_get_num(uint64_t *out, lsm_entry_handle *handle, } lsm_error lsm_entry_attr_remove(lsm_str **out, lsm_entry_handle *handle, - uint64_t type) { + uint8_t type) { if (!lsm_entry_attr_present(handle, type)) { return lsm_error_not_found; } @@ -109,7 +110,7 @@ lsm_error lsm_entry_attr_remove(lsm_str **out, lsm_entry_handle *handle, free(entry->attrs.items); entry->attrs.items = NULL; entry->attrs.count = 0; - entry->attrs.bitmap = 0; + entry->attrs.bitmap[type / 64] &= ~(((uint64_t)1) << (type % 64)); return lsm_error_ok; } @@ -138,12 +139,12 @@ lsm_error lsm_entry_attr_remove(lsm_str **out, lsm_entry_handle *handle, entry->attrs.items = new_attrs; entry->attrs.count--; - entry->attrs.bitmap &= ~type; + entry->attrs.bitmap[type / 64] &= ~(((uint64_t)1) << (type % 64)); return lsm_error_ok; } -lsm_error lsm_entry_attr_insert(lsm_entry_handle *handle, uint64_t type, +lsm_error lsm_entry_attr_insert(lsm_entry_handle *handle, uint8_t type, lsm_str *data) { if (lsm_entry_attr_present(handle, type)) { return lsm_error_already_present; @@ -163,12 +164,12 @@ lsm_error lsm_entry_attr_insert(lsm_entry_handle *handle, uint64_t type, entry->attrs.items = new_attrs; entry->attrs.count++; - entry->attrs.bitmap |= type; + entry->attrs.bitmap[type / 64] |= ((uint64_t)1) << (type % 64); return lsm_error_ok; } -lsm_error lsm_entry_attr_insert_num(lsm_entry_handle *handle, uint64_t type, +lsm_error lsm_entry_attr_insert_num(lsm_entry_handle *handle, uint8_t type, uint64_t data) { lsm_str *s; LSM_RES( From ef33825b7b404f2295383de2126c811df08b6e8a Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Wed, 8 Nov 2023 14:11:46 +0100 Subject: [PATCH 39/70] feat(lsm): always store entry data on disk --- lsm/include/lsm/store.h | 2 +- lsm/src/_include/lsm/store_internal.h | 9 +- lsm/src/store/lsm_store.c | 157 +++++++++++--------------- lsm/src/store/lsm_store_disk_read.c | 11 +- lsm/src/store/lsm_store_entry.c | 2 +- src/lander/lander_get.c | 6 +- 6 files changed, 77 insertions(+), 110 deletions(-) diff --git a/lsm/include/lsm/store.h b/lsm/include/lsm/store.h index cf0c9f7..7de2946 100644 --- a/lsm/include/lsm/store.h +++ b/lsm/include/lsm/store.h @@ -181,7 +181,7 @@ lsm_error lsm_entry_data_append_raw(lsm_store *store, lsm_entry_handle *handle, * @param handle entry handle to read from * @param len how many bytes to read at most */ -lsm_error lsm_entry_data_read(uint64_t *out, char *buf, +lsm_error lsm_entry_data_read(uint64_t *out, char *buf, lsm_store *store, lsm_entry_handle *handle, uint64_t len); /** diff --git a/lsm/src/_include/lsm/store_internal.h b/lsm/src/_include/lsm/store_internal.h index b8c24dc..e4bbdba 100644 --- a/lsm/src/_include/lsm/store_internal.h +++ b/lsm/src/_include/lsm/store_internal.h @@ -30,14 +30,7 @@ typedef struct lsm_entry { uint8_t count; lsm_attr *items; } attrs; - struct { - uint64_t len; - union { - FILE *file; - char *ptr; - } value; - bool on_disk; - } data; + uint64_t data_len; } lsm_entry; /** diff --git a/lsm/src/store/lsm_store.c b/lsm/src/store/lsm_store.c index 2345cb8..c6df34e 100644 --- a/lsm/src/store/lsm_store.c +++ b/lsm/src/store/lsm_store.c @@ -60,22 +60,22 @@ lsm_error lsm_store_open_read(lsm_entry_handle **out, lsm_store *store, return res; } - // Open a new file descriptor if needed - if (entry->data.on_disk) { - char path[store->data_path->len + entry->key->len + 2]; - sprintf(path, "%s/%s", lsm_str_ptr(store->data_path), - lsm_str_ptr(entry->key)); + /* // Open a new file descriptor if needed */ + /* if (entry->data_len > 0) { */ + /* char path[store->data_path->len + entry->key->len + 2]; */ + /* sprintf(path, "%s/%s", lsm_str_ptr(store->data_path), */ + /* lsm_str_ptr(entry->key)); */ - FILE *f = fopen(path, "rb"); + /* FILE *f = fopen(path, "rb"); */ - if (f == NULL) { - free(handle); + /* if (f == NULL) { */ + /* free(handle); */ - return lsm_error_failed_io; - } + /* return lsm_error_failed_io; */ + /* } */ - handle->f = f; - } + /* handle->f = f; */ + /* } */ handle->wrapper = wrapper; *out = handle; @@ -114,22 +114,22 @@ lsm_error lsm_store_open_write(lsm_entry_handle **out, lsm_store *store, return res; } - // Open a new file descriptor if needed - if (entry->data.on_disk) { - char path[store->data_path->len + entry->key->len + 2]; - sprintf(path, "%s/%s", lsm_str_ptr(store->data_path), - lsm_str_ptr(entry->key)); + /* // Open a new file descriptor if needed */ + /* if (entry->data_len > 0) { */ + /* char path[store->data_path->len + entry->key->len + 2]; */ + /* sprintf(path, "%s/%s", lsm_str_ptr(store->data_path), */ + /* lsm_str_ptr(entry->key)); */ - FILE *f = fopen(path, "ab"); + /* FILE *f = fopen(path, "ab"); */ - if (f == NULL) { - free(handle); + /* if (f == NULL) { */ + /* free(handle); */ - return lsm_error_failed_io; - } + /* return lsm_error_failed_io; */ + /* } */ - handle->f = f; - } + /* handle->f = f; */ + /* } */ handle->wrapper = wrapper; *out = handle; @@ -192,99 +192,68 @@ lsm_error lsm_entry_data_append(lsm_store *store, lsm_entry_handle *handle, lsm_entry *entry = handle->wrapper->entry; - uint64_t new_len = entry->data.len + lsm_str_len(data); + uint64_t new_len = entry->data_len + lsm_str_len(data); const char *data_s = lsm_str_ptr(data); - // Data is in memory and still fits -> keep it in memory - if ((new_len <= LSM_STORE_DISK_THRESHOLD) && (!entry->data.on_disk)) { - char *buf; + // Entries don't open their file unless needed + if (handle->f == NULL) { + char path[store->data_path->len + entry->key->len + 2]; + sprintf(path, "%s/%s", lsm_str_ptr(store->data_path), + lsm_str_ptr(entry->key)); - // Entries with no data do not have an allocated buffer yet - if (entry->data.len == 0) { - buf = malloc(new_len * sizeof(char)); - } else { - buf = realloc(entry->data.value.ptr, new_len * sizeof(char)); + FILE *f = fopen(path, "ab"); + + if (f == NULL) { + return lsm_error_failed_io; } - if (buf == NULL) { - return lsm_error_failed_alloc; - } - - memcpy(&buf[entry->data.len], data_s, lsm_str_len(data)); - entry->data.value.ptr = buf; - } - // Data will end up on disk - else { - // Data is not yet on disk, so we create the file - if (!entry->data.on_disk) { - char path[store->data_path->len + entry->key->len + 2]; - sprintf(path, "%s/%s", lsm_str_ptr(store->data_path), - lsm_str_ptr(entry->key)); - - FILE *f = fopen(path, "ab"); - - if (f == NULL) { - return lsm_error_failed_io; - } - - // If there was data present in memory already, we sync this to disk. - // This check is required because it's possible that more than the - // treshold is written to an empty entry immediately, meaning there's no - // allocated memory buffer present. - if (entry->data.len > 0) { - size_t written = 0; - - // Write original in-memory data to file - while (written < entry->data.len) { - written += fwrite(&entry->data.value.ptr[written], sizeof(char), - entry->data.len - written, f); - } - - free(entry->data.value.ptr); - entry->data.value.ptr = NULL; - } - - handle->f = f; - entry->data.on_disk = true; - } - - size_t written = 0; - - // TODO what happens when I/O fails? - while (written < data->len) { - written += fwrite(&data_s[written], sizeof(char), data->len - written, - handle->f); - } + handle->f = f; } - entry->data.len = new_len; + size_t written = 0; + + // TODO what happens when I/O fails? + while (written < data->len) { + written += + fwrite(&data_s[written], sizeof(char), data->len - written, handle->f); + } + + entry->data_len = new_len; return lsm_error_ok; } -lsm_error lsm_entry_data_read(uint64_t *out, char *buf, +lsm_error lsm_entry_data_read(uint64_t *out, char *buf, lsm_store *store, lsm_entry_handle *handle, uint64_t len) { lsm_entry *entry = handle->wrapper->entry; - if (entry->data.len == 0) { + if (entry->data_len == 0) { *out = 0; return lsm_error_ok; } - uint64_t read; + // Entries don't open their file unless needed + if (handle->f == NULL) { + char path[store->data_path->len + entry->key->len + 2]; + sprintf(path, "%s/%s", lsm_str_ptr(store->data_path), + lsm_str_ptr(entry->key)); - if (entry->data.on_disk) { - read = fread(buf, sizeof(char), len, handle->f); + FILE *f = fopen(path, "rb"); - if ((read == 0) && (ferror(handle->f) != 0)) { + if (f == NULL) { return lsm_error_failed_io; } - } else { - read = (entry->data.len - handle->pos) < len - ? (entry->data.len - handle->pos) - : len; - memcpy(buf, &entry->data.value.ptr[handle->pos], read * sizeof(char)); + + handle->f = f; + } + + uint64_t read; + + read = fread(buf, sizeof(char), len, handle->f); + + if ((read == 0) && (ferror(handle->f) != 0)) { + return lsm_error_failed_io; } handle->pos += read; diff --git a/lsm/src/store/lsm_store_disk_read.c b/lsm/src/store/lsm_store_disk_read.c index e644846..2b11a30 100644 --- a/lsm/src/store/lsm_store_disk_read.c +++ b/lsm/src/store/lsm_store_disk_read.c @@ -88,7 +88,9 @@ static lsm_error lsm_fread(void *out, uint64_t *sum, FILE *f, uint64_t size, return lsm_error_failed_io; } - *sum += size * count; + if (sum != NULL) { + *sum += size * count; + } return lsm_error_ok; } @@ -109,7 +111,9 @@ static lsm_error lsm_entry_read_str(lsm_str **out, uint64_t *sum, FILE *f) { read += fread(&buf[read], 1, len - read, f); } - *sum += len; + if (sum != NULL) { + *sum += len; + } return lsm_str_init(out, buf); } @@ -154,8 +158,7 @@ lsm_error lsm_store_load_db(lsm_store *store) { return lsm_error_failed_io; } - LSM_RES( - lsm_entry_read_attrs(&store->idx_file_size, handle, store->db_file)); + LSM_RES(lsm_entry_read_attrs(NULL, handle, store->db_file)); lsm_entry_close(handle); store->db_file_size += db_dim[1]; diff --git a/lsm/src/store/lsm_store_entry.c b/lsm/src/store/lsm_store_entry.c index 51dd998..f34d633 100644 --- a/lsm/src/store/lsm_store_entry.c +++ b/lsm/src/store/lsm_store_entry.c @@ -179,5 +179,5 @@ lsm_error lsm_entry_attr_insert_num(lsm_entry_handle *handle, uint8_t type, } uint64_t lsm_entry_data_len(lsm_entry_handle *handle) { - return handle->wrapper->entry->data.len; + return handle->wrapper->entry->data_len; } diff --git a/src/lander/lander_get.c b/src/lander/lander_get.c index b063e29..cfb466e 100644 --- a/src/lander/lander_get.c +++ b/src/lander/lander_get.c @@ -121,6 +121,8 @@ bool lander_get_entry_lsm(event_loop_conn *conn) { bool lander_stream_body_to_client(event_loop_conn *conn) { http_loop_ctx *ctx = conn->ctx; lander_ctx *c_ctx = ctx->c; + http_loop_gctx *gctx = ctx->g; + lander_gctx *c_gctx = gctx->c; if ((c_ctx->entry == NULL) || (ctx->res.body.expected_len == ctx->res.body.len)) { @@ -131,8 +133,8 @@ bool lander_stream_body_to_client(event_loop_conn *conn) { ctx->res.body.expected_len - ctx->res.body.len); uint64_t read = 0; - lsm_entry_data_read(&read, (char *)&conn->wbuf[conn->wbuf_size], c_ctx->entry, - to_write); + lsm_entry_data_read(&read, (char *)&conn->wbuf[conn->wbuf_size], + c_gctx->store, c_ctx->entry, to_write); ctx->res.body.len += read; conn->wbuf_size += read; From 9c03a36aa2abb5b09b34fb665bb8c042512544f5 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Wed, 8 Nov 2023 16:04:21 +0100 Subject: [PATCH 40/70] fix(lsm): store data len in db; fix bug --- lsm/include/lsm/store.h | 8 ++++++++ lsm/src/store/lsm_store.c | 2 ++ lsm/src/store/lsm_store_disk_read.c | 4 ++++ lsm/src/store/lsm_store_disk_write.c | 5 +++-- src/main.c | 9 ++++++++- 5 files changed, 25 insertions(+), 3 deletions(-) diff --git a/lsm/include/lsm/store.h b/lsm/include/lsm/store.h index 7de2946..bad5484 100644 --- a/lsm/include/lsm/store.h +++ b/lsm/include/lsm/store.h @@ -91,6 +91,14 @@ typedef struct lsm_store lsm_store; */ lsm_error lsm_store_init(lsm_store **ptr); +/** + * Return how many elements are stored in the trie. + * + * @param store store to use + * @return how many elements are in the store + */ +uint64_t lsm_store_size(lsm_store *store); + /** * Open the given database file and load it into a new store object. * diff --git a/lsm/src/store/lsm_store.c b/lsm/src/store/lsm_store.c index c6df34e..3ac2232 100644 --- a/lsm/src/store/lsm_store.c +++ b/lsm/src/store/lsm_store.c @@ -30,6 +30,8 @@ lsm_error lsm_store_init(lsm_store **ptr) { return lsm_error_ok; } +uint64_t lsm_store_size(lsm_store *store) { return lsm_trie_size(store->trie); } + lsm_error lsm_store_open_read(lsm_entry_handle **out, lsm_store *store, lsm_str *key) { lsm_entry_wrapper *wrapper; diff --git a/lsm/src/store/lsm_store_disk_read.c b/lsm/src/store/lsm_store_disk_read.c index 2b11a30..fc4d748 100644 --- a/lsm/src/store/lsm_store_disk_read.c +++ b/lsm/src/store/lsm_store_disk_read.c @@ -105,6 +105,8 @@ static lsm_error lsm_entry_read_str(lsm_str **out, uint64_t *sum, FILE *f) { return lsm_error_failed_alloc; } + buf[len] = '\0'; + uint64_t read = 0; while (read < len) { @@ -158,6 +160,8 @@ lsm_error lsm_store_load_db(lsm_store *store) { return lsm_error_failed_io; } + LSM_RES(lsm_fread(&handle->wrapper->entry->data_len, NULL, store->db_file, + sizeof(uint64_t), 1)); LSM_RES(lsm_entry_read_attrs(NULL, handle, store->db_file)); lsm_entry_close(handle); diff --git a/lsm/src/store/lsm_store_disk_write.c b/lsm/src/store/lsm_store_disk_write.c index 4bdd297..eb60c22 100644 --- a/lsm/src/store/lsm_store_disk_write.c +++ b/lsm/src/store/lsm_store_disk_write.c @@ -37,10 +37,11 @@ lsm_error lsm_entry_write_db(uint64_t *size, FILE *db_file, lsm_entry *entry, uint64_t pos) { LSM_RES(lsm_seek(db_file, pos)); - // First we write how many attributes follow + LSM_RES(lsm_entry_write_uint64_t(db_file, entry->data_len)); + LSM_RES( lsm_entry_write_single(db_file, sizeof(uint8_t), &entry->attrs.count)); - *size = sizeof(uint8_t); + *size = sizeof(uint64_t) + sizeof(uint8_t); for (uint8_t i = 0; i < entry->attrs.count; i++) { // Write attribute type, length & value diff --git a/src/main.c b/src/main.c index f854f3e..bf0dd9a 100644 --- a/src/main.c +++ b/src/main.c @@ -1,4 +1,6 @@ #include +#include +#include #include "lander.h" #include "log.h" @@ -20,6 +22,7 @@ int main() { setvbuf(stdout, NULL, _IONBF, 0); + srand(time(NULL)); ENV(api_key, "LANDER_API_KEY"); ENV_OPT(port_str, "LANDER_PORT", "18080"); @@ -47,14 +50,18 @@ int main() { lander_gctx *c_gctx = lander_gctx_init(); c_gctx->data_dir = data_dir_s; - /* c_gctx->trie = trie; */ lsm_str *data_dir; lsm_str_init_copy(&data_dir, (char *)data_dir_s); + + info("Initializing store from path '%s'", data_dir_s); + if (lsm_store_load(&c_gctx->store, data_dir) != lsm_error_ok) { critical(2, "Failed to load existing store."); } + info("Store loaded containing %lu entries.", lsm_store_size(c_gctx->store)); + http_loop *hl = http_loop_init( lander_routes, sizeof(lander_routes) / sizeof(lander_routes[0]), c_gctx, lander_ctx_init, (void (*)(void *))lander_ctx_reset, From 0efcdece48a4f5bfec7cb7c233a818a4e6a3817e Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Wed, 8 Nov 2023 18:17:58 +0100 Subject: [PATCH 41/70] feat(lander): store entry type as single byte --- lsm/include/lsm/store.h | 30 +++++++++++++++++++++++++---- lsm/src/store/lsm_store_entry.c | 28 +++++++++++++++++++++++---- src/lander/lander_get.c | 34 +++------------------------------ src/lander/lander_post.c | 8 ++++---- 4 files changed, 57 insertions(+), 43 deletions(-) diff --git a/lsm/include/lsm/store.h b/lsm/include/lsm/store.h index bad5484..d49bbdf 100644 --- a/lsm/include/lsm/store.h +++ b/lsm/include/lsm/store.h @@ -41,8 +41,19 @@ lsm_error lsm_entry_attr_get(lsm_str **out, lsm_entry_handle *handle, * @param entry entry to search for * @param type type of attribute to return */ -lsm_error lsm_entry_attr_get_num(uint64_t *out, lsm_entry_handle *handle, - uint8_t type); +lsm_error lsm_entry_attr_get_uint64_t(uint64_t *out, lsm_entry_handle *handle, + uint8_t type); + +/** + * Convenience wrapper around `lsm_entry_attr_get` that can be used if we know + * beforehand the attribute value is an 8-bit number. + * + * @param out where to store attribute data + * @param entry entry to search for + * @param type type of attribute to return + */ +lsm_error lsm_entry_attr_get_uint8_t(uint8_t *out, lsm_entry_handle *handle, + uint8_t type); /** * Add a new attribute to the entry. @@ -62,8 +73,19 @@ lsm_error lsm_entry_attr_insert(lsm_entry_handle *handle, uint8_t type, * @param type type of attribute to add * @param data data of attribute */ -lsm_error lsm_entry_attr_insert_num(lsm_entry_handle *handle, uint8_t type, - uint64_t data); +lsm_error lsm_entry_attr_insert_uint64_t(lsm_entry_handle *handle, uint8_t type, + uint64_t data); + +/** + * Convenience wrapper around `lsm_entry_attr_insert` that can be used if the + * data to be stored is an 8-bit number. + * + * @param entry entry to modify + * @param type type of attribute to add + * @param data data of attribute + */ +lsm_error lsm_entry_attr_insert_uint8_t(lsm_entry_handle *handle, uint8_t type, + uint8_t data); /** * Remove an atribute from the given entry, if present. diff --git a/lsm/src/store/lsm_store_entry.c b/lsm/src/store/lsm_store_entry.c index f34d633..58eba29 100644 --- a/lsm/src/store/lsm_store_entry.c +++ b/lsm/src/store/lsm_store_entry.c @@ -79,8 +79,8 @@ lsm_error lsm_entry_attr_get(lsm_str **out, lsm_entry_handle *handle, return lsm_error_ok; } -lsm_error lsm_entry_attr_get_num(uint64_t *out, lsm_entry_handle *handle, - uint8_t type) { +lsm_error lsm_entry_attr_get_uint64_t(uint64_t *out, lsm_entry_handle *handle, + uint8_t type) { lsm_str *s; LSM_RES(lsm_entry_attr_get(&s, handle, type)); @@ -96,6 +96,17 @@ lsm_error lsm_entry_attr_get_num(uint64_t *out, lsm_entry_handle *handle, return lsm_error_ok; } +lsm_error lsm_entry_attr_get_uint8_t(uint8_t *out, lsm_entry_handle *handle, + uint8_t type) { + lsm_str *s; + + LSM_RES(lsm_entry_attr_get(&s, handle, type)); + + *out = lsm_str_char(s, 0); + + return lsm_error_ok; +} + lsm_error lsm_entry_attr_remove(lsm_str **out, lsm_entry_handle *handle, uint8_t type) { if (!lsm_entry_attr_present(handle, type)) { @@ -169,8 +180,8 @@ lsm_error lsm_entry_attr_insert(lsm_entry_handle *handle, uint8_t type, return lsm_error_ok; } -lsm_error lsm_entry_attr_insert_num(lsm_entry_handle *handle, uint8_t type, - uint64_t data) { +lsm_error lsm_entry_attr_insert_uint64_t(lsm_entry_handle *handle, uint8_t type, + uint64_t data) { lsm_str *s; LSM_RES( lsm_str_init_copy_n(&s, (char *)&data, sizeof(uint64_t) / sizeof(char))); @@ -178,6 +189,15 @@ lsm_error lsm_entry_attr_insert_num(lsm_entry_handle *handle, uint8_t type, return lsm_entry_attr_insert(handle, type, s); } +lsm_error lsm_entry_attr_insert_uint8_t(lsm_entry_handle *handle, uint8_t type, + uint8_t data) { + lsm_str *s; + LSM_RES( + lsm_str_init_copy_n(&s, (char *)&data, sizeof(uint8_t) / sizeof(char))); + + return lsm_entry_attr_insert(handle, type, s); +} + uint64_t lsm_entry_data_len(lsm_entry_handle *handle) { return handle->wrapper->entry->data_len; } diff --git a/src/lander/lander_get.c b/src/lander/lander_get.c index cfb466e..5d1be5f 100644 --- a/src/lander/lander_get.c +++ b/src/lander/lander_get.c @@ -26,35 +26,6 @@ bool lander_get_index(event_loop_conn *conn) { return true; } -bool lander_get_entry(event_loop_conn *conn) { - http_loop_ctx *ctx = conn->ctx; - lander_gctx *c_gctx = ctx->g->c; - - const char *key = &ctx->req.path[ctx->req.regex_groups[1].rm_so]; - int key_len = ctx->req.regex_groups[1].rm_eo - ctx->req.regex_groups[1].rm_so; - - Entry *entry; - TrieExitCode res = trie_search_len(c_gctx->trie, &entry, key, key_len); - - if (res == NotFound) { - ctx->res.status = http_not_found; - } else if (entry->type == Redirect) { - ctx->res.status = http_moved_permanently; - http_res_add_header(&ctx->res, http_header_location, entry->string, false); - } else if (entry->type == Paste) { - char fname[strlen(c_gctx->data_dir) + 8 + key_len + 1]; - sprintf(fname, "%s/pastes/%.*s", c_gctx->data_dir, key_len, key); - - http_res_set_body_file(&ctx->res, fname); - // TODO don't call everything a text file - http_res_set_mime_type(&ctx->res, http_mime_txt); - } - - conn->state = event_loop_conn_state_res; - - return true; -} - bool lander_get_entry_lsm(event_loop_conn *conn) { http_loop_ctx *ctx = conn->ctx; lander_ctx *c_ctx = ctx->c; @@ -79,8 +50,8 @@ bool lander_get_entry_lsm(event_loop_conn *conn) { } lander_entry_type t; - lsm_entry_attr_get_num((uint64_t *)&t, c_ctx->entry, - lander_attr_type_entry_type); + lsm_entry_attr_get_uint8_t((uint8_t *)&t, c_ctx->entry, + lander_attr_type_entry_type); if (t == lander_entry_type_redirect) { // For redirects, the URL is stored as an in-memory attribute @@ -113,6 +84,7 @@ bool lander_get_entry_lsm(event_loop_conn *conn) { c_ctx->entry = NULL; } else { ctx->res.body.expected_len = lsm_entry_data_len(c_ctx->entry); + http_res_set_mime_type(&ctx->res, http_mime_txt); } return true; diff --git a/src/lander/lander_post.c b/src/lander/lander_post.c index 05dc662..ee1a9cb 100644 --- a/src/lander/lander_post.c +++ b/src/lander/lander_post.c @@ -132,8 +132,8 @@ bool lander_post_redirect_lsm(event_loop_conn *conn) { return true; } - lsm_entry_attr_insert_num(c_ctx->entry, lander_attr_type_entry_type, - lander_entry_type_redirect); + lsm_entry_attr_insert_uint8_t(c_ctx->entry, lander_attr_type_entry_type, + lander_entry_type_redirect); return true; } @@ -171,8 +171,8 @@ bool lander_post_paste_lsm(event_loop_conn *conn) { return true; } - lsm_entry_attr_insert_num(c_ctx->entry, lander_attr_type_entry_type, - lander_entry_type_paste); + lsm_entry_attr_insert_uint8_t(c_ctx->entry, lander_attr_type_entry_type, + lander_entry_type_paste); return true; } From 4fb127d9db494ace36021f0223910f755db4768f Mon Sep 17 00:00:00 2001 From: Jef Roosens Date: Wed, 8 Nov 2023 21:59:47 +0100 Subject: [PATCH 42/70] chore: update changelog --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3872f31..993ae07 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * Users can now define custom global & request-local contexts * Introduced "response steps", allowing custom code during the response part of a request +* LSM - Lander Storage Module + * Rewrite of trie codebase + * Introduced a custom data store using an in-memory trie as index and a + custom binary on-disk format + * Support for lookup & insert +* Lander + * Replaced old trie implementation with LSM store ## [0.1.0](https://git.rustybever.be/Chewing_Bever/lander/src/tag/0.1.0) From 715e1f9a5880cd84d371437b212ff6f0bdaa2f8a Mon Sep 17 00:00:00 2001 From: Jef Roosens Date: Thu, 9 Nov 2023 21:07:51 +0100 Subject: [PATCH 43/70] refactor(lsm): clean up disk write code --- include/lander.h | 2 +- lsm/src/store/lsm_store_disk_write.c | 88 +++++++++++++++------------- 2 files changed, 47 insertions(+), 43 deletions(-) diff --git a/include/lander.h b/include/lander.h index 5abea75..3c812c1 100644 --- a/include/lander.h +++ b/include/lander.h @@ -24,7 +24,7 @@ typedef enum lander_attr_type : uint8_t { lander_attr_type_url = 2, } lander_attr_type; -typedef enum lander_entry_type { +typedef enum lander_entry_type : uint8_t { lander_entry_type_redirect = 0, lander_entry_type_paste = 1, } lander_entry_type; diff --git a/lsm/src/store/lsm_store_disk_write.c b/lsm/src/store/lsm_store_disk_write.c index eb60c22..b79a78b 100644 --- a/lsm/src/store/lsm_store_disk_write.c +++ b/lsm/src/store/lsm_store_disk_write.c @@ -1,31 +1,39 @@ #include "lsm/store_internal.h" -static lsm_error lsm_entry_write_single(FILE *f, uint64_t size, void *val) { - size_t res = fwrite(val, size, 1, f); +static lsm_error lsm_fwrite(uint64_t *sum, FILE *f, uint64_t size, + uint64_t count, void *val) { + size_t res = fwrite(val, size, count, f); - if (res == 0) { + if (res < count) { return lsm_error_failed_io; } + if (sum != NULL) { + *sum += size * count; + } + return lsm_error_ok; } -static lsm_error lsm_entry_write_uint64_t(FILE *f, uint64_t num) { - return lsm_entry_write_single(f, sizeof(uint64_t), &num); -} +static lsm_error lsm_write_str(uint64_t *sum, FILE *f, lsm_str *s) { + uint64_t len = lsm_str_len(s); + + LSM_RES(lsm_fwrite(sum, f, sizeof(uint64_t), 1, &len)); -static lsm_error lsm_entry_write_str(FILE *f, lsm_str *s) { - uint64_t to_write = lsm_str_len(s); uint64_t written = 0; do { - written += fwrite(lsm_str_ptr(s), sizeof(char), to_write - written, f); - } while (written < to_write); + written += fwrite(lsm_str_ptr(s), sizeof(char), len - written, f); + } while (written < len); + + if (sum != NULL) { + *sum += len * sizeof(char); + } return lsm_error_ok; } -static lsm_error lsm_seek(FILE *f, uint64_t pos) { +static lsm_error lsm_fseek(FILE *f, uint64_t pos) { if (fseek(f, pos, SEEK_SET) != 0) { return lsm_error_failed_io; } @@ -35,24 +43,17 @@ static lsm_error lsm_seek(FILE *f, uint64_t pos) { lsm_error lsm_entry_write_db(uint64_t *size, FILE *db_file, lsm_entry *entry, uint64_t pos) { - LSM_RES(lsm_seek(db_file, pos)); + *size = 0; - LSM_RES(lsm_entry_write_uint64_t(db_file, entry->data_len)); + LSM_RES(lsm_fseek(db_file, pos)); - LSM_RES( - lsm_entry_write_single(db_file, sizeof(uint8_t), &entry->attrs.count)); - *size = sizeof(uint64_t) + sizeof(uint8_t); + LSM_RES(lsm_fwrite(size, db_file, sizeof(uint64_t), 1, &entry->data_len)); + LSM_RES(lsm_fwrite(size, db_file, sizeof(uint8_t), 1, &entry->attrs.count)); for (uint8_t i = 0; i < entry->attrs.count; i++) { - // Write attribute type, length & value - LSM_RES(lsm_entry_write_single(db_file, sizeof(uint8_t), - &entry->attrs.items[i].type)); - LSM_RES(lsm_entry_write_uint64_t(db_file, - lsm_str_len(entry->attrs.items[i].str))); - LSM_RES(lsm_entry_write_str(db_file, entry->attrs.items[i].str)); - - *size += sizeof(uint8_t) + sizeof(uint64_t) + - lsm_str_len(entry->attrs.items[i].str) * sizeof(char); + LSM_RES(lsm_fwrite(size, db_file, sizeof(uint8_t), 1, + &entry->attrs.items[i].type)); + LSM_RES(lsm_write_str(size, db_file, entry->attrs.items[i].str)); } return lsm_error_ok; @@ -60,13 +61,13 @@ lsm_error lsm_entry_write_db(uint64_t *size, FILE *db_file, lsm_entry *entry, lsm_error lsm_entry_write_idx(uint64_t *size, FILE *idx_file, lsm_entry *entry, uint64_t offset, uint64_t len, uint64_t pos) { - LSM_RES(lsm_seek(idx_file, pos)); - LSM_RES(lsm_entry_write_uint64_t(idx_file, lsm_str_len(entry->key))); - LSM_RES(lsm_entry_write_str(idx_file, entry->key)); - LSM_RES(lsm_entry_write_uint64_t(idx_file, offset)); - LSM_RES(lsm_entry_write_uint64_t(idx_file, len)); + *size = 0; - *size = 3 * sizeof(uint64_t) + lsm_str_len(entry->key) * sizeof(char); + LSM_RES(lsm_fseek(idx_file, pos)); + + LSM_RES(lsm_write_str(size, idx_file, entry->key)); + LSM_RES(lsm_fwrite(size, idx_file, sizeof(uint64_t), 1, &offset)); + LSM_RES(lsm_fwrite(size, idx_file, sizeof(uint64_t), 1, &len)); return lsm_error_ok; } @@ -74,9 +75,10 @@ lsm_error lsm_entry_write_idx(uint64_t *size, FILE *idx_file, lsm_entry *entry, lsm_error lsm_entry_sync(lsm_store *store, lsm_entry_handle *handle) { pthread_mutex_lock(&store->db_lock); - uint64_t entry_size; - lsm_error res = lsm_entry_write_db( - &entry_size, store->db_file, handle->wrapper->entry, store->db_file_size); + uint64_t db_entry_size; + lsm_error res = + lsm_entry_write_db(&db_entry_size, store->db_file, handle->wrapper->entry, + store->db_file_size); fflush(store->db_file); if (res != lsm_error_ok) { @@ -85,17 +87,17 @@ lsm_error lsm_entry_sync(lsm_store *store, lsm_entry_handle *handle) { return res; } - uint64_t entry_index = store->db_file_size; - store->db_file_size += entry_size; + uint64_t db_entry_index = store->db_file_size; pthread_mutex_unlock(&store->db_lock); // Append entry to index file pthread_mutex_lock(&store->idx_lock); - res = - lsm_entry_write_idx(&entry_size, store->idx_file, handle->wrapper->entry, - entry_index, entry_size, store->idx_file_size); + uint64_t idx_entry_size; + res = lsm_entry_write_idx(&idx_entry_size, store->idx_file, + handle->wrapper->entry, db_entry_index, + db_entry_size, store->idx_file_size); if (res == lsm_error_ok) { // Update the counter at the beginning of the file @@ -103,14 +105,16 @@ lsm_error lsm_entry_sync(lsm_store *store, lsm_entry_handle *handle) { uint64_t new_block_count = store->idx_file_block_count + 1; - res = lsm_entry_write_uint64_t(store->idx_file, new_block_count); + res = lsm_fwrite(NULL, store->idx_file, sizeof(uint64_t), 1, + &new_block_count); if (res == lsm_error_ok) { // Only if we successfully updated the on-disk counter do we make the code - // aware that the file's size has increased. This way, if a write to the + // aware that the files' sizes have increased. This way, if a write to the // counter fails, the code will simply reuse the already written content. - store->idx_file_size += entry_size; + store->idx_file_size += idx_entry_size; store->idx_file_block_count = new_block_count; + store->db_file_size += db_entry_size; } } From 2f58d1ee489d25730272b940c680f5a63c9343bd Mon Sep 17 00:00:00 2001 From: Jef Roosens Date: Thu, 9 Nov 2023 21:32:39 +0100 Subject: [PATCH 44/70] feat(lsm): track entry idx file offset --- lsm/src/_include/lsm/store_internal.h | 1 + lsm/src/store/lsm_store_disk_read.c | 4 ++++ lsm/src/store/lsm_store_disk_write.c | 22 ++++++++++++---------- lsm/src/trie/lsm_trie.c | 7 ------- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/lsm/src/_include/lsm/store_internal.h b/lsm/src/_include/lsm/store_internal.h index e4bbdba..6bd7b00 100644 --- a/lsm/src/_include/lsm/store_internal.h +++ b/lsm/src/_include/lsm/store_internal.h @@ -31,6 +31,7 @@ typedef struct lsm_entry { lsm_attr *items; } attrs; uint64_t data_len; + uint64_t idx_file_offset; } lsm_entry; /** diff --git a/lsm/src/store/lsm_store_disk_read.c b/lsm/src/store/lsm_store_disk_read.c index fc4d748..17b91d7 100644 --- a/lsm/src/store/lsm_store_disk_read.c +++ b/lsm/src/store/lsm_store_disk_read.c @@ -150,6 +150,8 @@ lsm_error lsm_store_load_db(lsm_store *store) { store->idx_file, sizeof(uint64_t), 1)); for (uint64_t i = 0; i < store->idx_file_block_count; i++) { + uint64_t idx_file_offset = store->idx_file_size; + LSM_RES(lsm_entry_read_str(&key, &store->idx_file_size, store->idx_file)); LSM_RES(lsm_fread(&db_dim, &store->idx_file_size, store->idx_file, sizeof(uint64_t), 2)); @@ -163,6 +165,8 @@ lsm_error lsm_store_load_db(lsm_store *store) { LSM_RES(lsm_fread(&handle->wrapper->entry->data_len, NULL, store->db_file, sizeof(uint64_t), 1)); LSM_RES(lsm_entry_read_attrs(NULL, handle, store->db_file)); + + handle->wrapper->entry->idx_file_offset = idx_file_offset; lsm_entry_close(handle); store->db_file_size += db_dim[1]; diff --git a/lsm/src/store/lsm_store_disk_write.c b/lsm/src/store/lsm_store_disk_write.c index b79a78b..3482f53 100644 --- a/lsm/src/store/lsm_store_disk_write.c +++ b/lsm/src/store/lsm_store_disk_write.c @@ -41,7 +41,7 @@ static lsm_error lsm_fseek(FILE *f, uint64_t pos) { return lsm_error_ok; } -lsm_error lsm_entry_write_db(uint64_t *size, FILE *db_file, lsm_entry *entry, +lsm_error lsm_write_db_entry(uint64_t *size, FILE *db_file, lsm_entry *entry, uint64_t pos) { *size = 0; @@ -59,7 +59,7 @@ lsm_error lsm_entry_write_db(uint64_t *size, FILE *db_file, lsm_entry *entry, return lsm_error_ok; } -lsm_error lsm_entry_write_idx(uint64_t *size, FILE *idx_file, lsm_entry *entry, +lsm_error lsm_write_idx_entry(uint64_t *size, FILE *idx_file, lsm_entry *entry, uint64_t offset, uint64_t len, uint64_t pos) { *size = 0; @@ -75,27 +75,27 @@ lsm_error lsm_entry_write_idx(uint64_t *size, FILE *idx_file, lsm_entry *entry, lsm_error lsm_entry_sync(lsm_store *store, lsm_entry_handle *handle) { pthread_mutex_lock(&store->db_lock); + uint64_t db_entry_index = store->db_file_size; + uint64_t db_entry_size; lsm_error res = - lsm_entry_write_db(&db_entry_size, store->db_file, handle->wrapper->entry, + lsm_write_db_entry(&db_entry_size, store->db_file, handle->wrapper->entry, store->db_file_size); fflush(store->db_file); - if (res != lsm_error_ok) { - pthread_mutex_unlock(&store->db_lock); + pthread_mutex_unlock(&store->db_lock); + if (res != lsm_error_ok) { return res; } - uint64_t db_entry_index = store->db_file_size; - - pthread_mutex_unlock(&store->db_lock); - // Append entry to index file pthread_mutex_lock(&store->idx_lock); + uint64_t idx_entry_index = store->idx_file_size; + uint64_t idx_entry_size; - res = lsm_entry_write_idx(&idx_entry_size, store->idx_file, + res = lsm_write_idx_entry(&idx_entry_size, store->idx_file, handle->wrapper->entry, db_entry_index, db_entry_size, store->idx_file_size); @@ -115,6 +115,8 @@ lsm_error lsm_entry_sync(lsm_store *store, lsm_entry_handle *handle) { store->idx_file_size += idx_entry_size; store->idx_file_block_count = new_block_count; store->db_file_size += db_entry_size; + + handle->wrapper->entry->idx_file_offset = idx_entry_index; } } diff --git a/lsm/src/trie/lsm_trie.c b/lsm/src/trie/lsm_trie.c index 8744b4e..0e5b548 100644 --- a/lsm/src/trie/lsm_trie.c +++ b/lsm/src/trie/lsm_trie.c @@ -252,19 +252,12 @@ lsm_error lsm_trie_remove(void **data, lsm_trie *trie, lsm_str *key) { return lsm_error_not_found; } - // Child is the node we wish to delete if (data != NULL) { *data = child->data; } child->data = NULL; - // We only remove child if it has no children of its own - if (lsm_bt_size(&child->bt) == 0) { - lsm_bt_remove(NULL, &parent->bt, c); - lsm_trie_node_free(child); - } - trie->size--; return lsm_error_ok; From d4b21fb84d611b3b73fef5b92d3b6adbebed8767 Mon Sep 17 00:00:00 2001 From: Jef Roosens Date: Thu, 9 Nov 2023 21:48:15 +0100 Subject: [PATCH 45/70] feat(lsm): add valid entry marker to idx entries --- lsm/src/store/lsm_store_disk_read.c | 46 ++++++++++++++++++++-------- lsm/src/store/lsm_store_disk_write.c | 3 ++ 2 files changed, 36 insertions(+), 13 deletions(-) diff --git a/lsm/src/store/lsm_store_disk_read.c b/lsm/src/store/lsm_store_disk_read.c index 17b91d7..8ce53c1 100644 --- a/lsm/src/store/lsm_store_disk_read.c +++ b/lsm/src/store/lsm_store_disk_read.c @@ -142,6 +142,7 @@ lsm_error lsm_store_load_db(lsm_store *store) { uint64_t db_dim[2]; lsm_str *key; lsm_entry_handle *handle; + bool valid_entry; rewind(store->idx_file); @@ -152,24 +153,43 @@ lsm_error lsm_store_load_db(lsm_store *store) { for (uint64_t i = 0; i < store->idx_file_block_count; i++) { uint64_t idx_file_offset = store->idx_file_size; - LSM_RES(lsm_entry_read_str(&key, &store->idx_file_size, store->idx_file)); - LSM_RES(lsm_fread(&db_dim, &store->idx_file_size, store->idx_file, - sizeof(uint64_t), 2)); - LSM_RES(lsm_store_insert(&handle, store, key)); + LSM_RES(lsm_fread(&valid_entry, &store->idx_file_size, store->idx_file, + sizeof(bool), 1)); - // Read attributes from database file - if (fseek(store->db_file, db_dim[0], SEEK_SET) != 0) { - return lsm_error_failed_io; + if (valid_entry) { + LSM_RES(lsm_entry_read_str(&key, &store->idx_file_size, store->idx_file)); + LSM_RES(lsm_fread(&db_dim, &store->idx_file_size, store->idx_file, + sizeof(uint64_t), 2)); + LSM_RES(lsm_store_insert(&handle, store, key)); + + // Read attributes from database file + if (fseek(store->db_file, db_dim[0], SEEK_SET) != 0) { + return lsm_error_failed_io; + } + + LSM_RES(lsm_fread(&handle->wrapper->entry->data_len, NULL, store->db_file, + sizeof(uint64_t), 1)); + LSM_RES(lsm_entry_read_attrs(NULL, handle, store->db_file)); + + handle->wrapper->entry->idx_file_offset = idx_file_offset; + lsm_entry_close(handle); + + store->db_file_size += db_dim[1]; } + // Simply skip the invalid entry + else { + uint64_t key_len; + LSM_RES(lsm_fread(&key_len, &store->idx_file_size, store->idx_file, + sizeof(uint64_t), 1)); - LSM_RES(lsm_fread(&handle->wrapper->entry->data_len, NULL, store->db_file, - sizeof(uint64_t), 1)); - LSM_RES(lsm_entry_read_attrs(NULL, handle, store->db_file)); + uint64_t remaining = key_len + 2 * sizeof(uint64_t); - handle->wrapper->entry->idx_file_offset = idx_file_offset; - lsm_entry_close(handle); + if (fseek(store->idx_file, remaining, SEEK_CUR) != 0) { + return lsm_error_failed_io; + } - store->db_file_size += db_dim[1]; + store->idx_file_size += remaining; + } } return lsm_error_ok; diff --git a/lsm/src/store/lsm_store_disk_write.c b/lsm/src/store/lsm_store_disk_write.c index 3482f53..3c9293d 100644 --- a/lsm/src/store/lsm_store_disk_write.c +++ b/lsm/src/store/lsm_store_disk_write.c @@ -65,6 +65,9 @@ lsm_error lsm_write_idx_entry(uint64_t *size, FILE *idx_file, lsm_entry *entry, LSM_RES(lsm_fseek(idx_file, pos)); + bool valid_entry_marker = true; + LSM_RES(lsm_fwrite(size, idx_file, sizeof(bool), 1, &valid_entry_marker)); + LSM_RES(lsm_write_str(size, idx_file, entry->key)); LSM_RES(lsm_fwrite(size, idx_file, sizeof(uint64_t), 1, &offset)); LSM_RES(lsm_fwrite(size, idx_file, sizeof(uint64_t), 1, &len)); From eb0ce16f78b1a554ef8cdb521c86fece4ad8a052 Mon Sep 17 00:00:00 2001 From: Jef Roosens Date: Thu, 9 Nov 2023 22:05:20 +0100 Subject: [PATCH 46/70] feat(lsm): store pointer to store in entry handle --- lsm/include/lsm/store.h | 11 +++--- lsm/src/_include/lsm/store_internal.h | 1 + lsm/src/store/lsm_store.c | 50 +++++---------------------- lsm/src/store/lsm_store_disk_read.c | 2 +- lsm/src/store/lsm_store_disk_write.c | 4 ++- src/lander/lander_get.c | 6 ++-- src/lander/lander_post.c | 8 ++--- 7 files changed, 23 insertions(+), 59 deletions(-) diff --git a/lsm/include/lsm/store.h b/lsm/include/lsm/store.h index d49bbdf..c7d46d7 100644 --- a/lsm/include/lsm/store.h +++ b/lsm/include/lsm/store.h @@ -188,8 +188,7 @@ lsm_error lsm_store_insert(lsm_entry_handle **out, lsm_store *store, * @param entry entry to append data to * @param data data to append */ -lsm_error lsm_entry_data_append(lsm_store *store, lsm_entry_handle *handle, - lsm_str *data); +lsm_error lsm_entry_data_append(lsm_entry_handle *handle, lsm_str *data); /** * Same as `lsm_entry_data_append`, except that it takes a direct char array. @@ -199,8 +198,8 @@ lsm_error lsm_entry_data_append(lsm_store *store, lsm_entry_handle *handle, * @param data data to append * @param len length of data array */ -lsm_error lsm_entry_data_append_raw(lsm_store *store, lsm_entry_handle *handle, - char *data, uint64_t len); +lsm_error lsm_entry_data_append_raw(lsm_entry_handle *handle, char *data, + uint64_t len); /** * Read a number of bytes from the entry's data field. The position from which @@ -211,7 +210,7 @@ lsm_error lsm_entry_data_append_raw(lsm_store *store, lsm_entry_handle *handle, * @param handle entry handle to read from * @param len how many bytes to read at most */ -lsm_error lsm_entry_data_read(uint64_t *out, char *buf, lsm_store *store, +lsm_error lsm_entry_data_read(uint64_t *out, char *buf, lsm_entry_handle *handle, uint64_t len); /** @@ -220,7 +219,7 @@ lsm_error lsm_entry_data_read(uint64_t *out, char *buf, lsm_store *store, * @param store store to persist entry in * @param handle handle to entry to persist */ -lsm_error lsm_entry_sync(lsm_store *store, lsm_entry_handle *handle); +lsm_error lsm_entry_sync(lsm_entry_handle *handle); /** * Return the length of the entry's data. diff --git a/lsm/src/_include/lsm/store_internal.h b/lsm/src/_include/lsm/store_internal.h index 6bd7b00..2a5856c 100644 --- a/lsm/src/_include/lsm/store_internal.h +++ b/lsm/src/_include/lsm/store_internal.h @@ -58,6 +58,7 @@ void lsm_entry_wrapper_free(lsm_entry_wrapper *wrapper); struct lsm_entry_handle { lsm_entry_wrapper *wrapper; + lsm_store *store; FILE *f; uint64_t pos; }; diff --git a/lsm/src/store/lsm_store.c b/lsm/src/store/lsm_store.c index 3ac2232..2da7c51 100644 --- a/lsm/src/store/lsm_store.c +++ b/lsm/src/store/lsm_store.c @@ -62,24 +62,8 @@ lsm_error lsm_store_open_read(lsm_entry_handle **out, lsm_store *store, return res; } - /* // Open a new file descriptor if needed */ - /* if (entry->data_len > 0) { */ - /* char path[store->data_path->len + entry->key->len + 2]; */ - /* sprintf(path, "%s/%s", lsm_str_ptr(store->data_path), */ - /* lsm_str_ptr(entry->key)); */ - - /* FILE *f = fopen(path, "rb"); */ - - /* if (f == NULL) { */ - /* free(handle); */ - - /* return lsm_error_failed_io; */ - /* } */ - - /* handle->f = f; */ - /* } */ - handle->wrapper = wrapper; + handle->store = store; *out = handle; return lsm_error_ok; @@ -116,24 +100,8 @@ lsm_error lsm_store_open_write(lsm_entry_handle **out, lsm_store *store, return res; } - /* // Open a new file descriptor if needed */ - /* if (entry->data_len > 0) { */ - /* char path[store->data_path->len + entry->key->len + 2]; */ - /* sprintf(path, "%s/%s", lsm_str_ptr(store->data_path), */ - /* lsm_str_ptr(entry->key)); */ - - /* FILE *f = fopen(path, "ab"); */ - - /* if (f == NULL) { */ - /* free(handle); */ - - /* return lsm_error_failed_io; */ - /* } */ - - /* handle->f = f; */ - /* } */ - handle->wrapper = wrapper; + handle->store = store; *out = handle; return lsm_error_ok; @@ -180,14 +148,14 @@ lsm_error lsm_store_insert(lsm_entry_handle **out, lsm_store *store, // No need to set the handle's file, as the entry doesn't have any data yet handle->wrapper = wrapper; + handle->store = store; *out = handle; return lsm_error_ok; } -lsm_error lsm_entry_data_append(lsm_store *store, lsm_entry_handle *handle, - lsm_str *data) { +lsm_error lsm_entry_data_append(lsm_entry_handle *handle, lsm_str *data) { if (lsm_str_len(data) == 0) { return lsm_error_ok; } @@ -199,8 +167,8 @@ lsm_error lsm_entry_data_append(lsm_store *store, lsm_entry_handle *handle, // Entries don't open their file unless needed if (handle->f == NULL) { - char path[store->data_path->len + entry->key->len + 2]; - sprintf(path, "%s/%s", lsm_str_ptr(store->data_path), + char path[handle->store->data_path->len + entry->key->len + 2]; + sprintf(path, "%s/%s", lsm_str_ptr(handle->store->data_path), lsm_str_ptr(entry->key)); FILE *f = fopen(path, "ab"); @@ -225,7 +193,7 @@ lsm_error lsm_entry_data_append(lsm_store *store, lsm_entry_handle *handle, return lsm_error_ok; } -lsm_error lsm_entry_data_read(uint64_t *out, char *buf, lsm_store *store, +lsm_error lsm_entry_data_read(uint64_t *out, char *buf, lsm_entry_handle *handle, uint64_t len) { lsm_entry *entry = handle->wrapper->entry; @@ -237,8 +205,8 @@ lsm_error lsm_entry_data_read(uint64_t *out, char *buf, lsm_store *store, // Entries don't open their file unless needed if (handle->f == NULL) { - char path[store->data_path->len + entry->key->len + 2]; - sprintf(path, "%s/%s", lsm_str_ptr(store->data_path), + char path[handle->store->data_path->len + entry->key->len + 2]; + sprintf(path, "%s/%s", lsm_str_ptr(handle->store->data_path), lsm_str_ptr(entry->key)); FILE *f = fopen(path, "rb"); diff --git a/lsm/src/store/lsm_store_disk_read.c b/lsm/src/store/lsm_store_disk_read.c index 8ce53c1..cba7a7b 100644 --- a/lsm/src/store/lsm_store_disk_read.c +++ b/lsm/src/store/lsm_store_disk_read.c @@ -132,7 +132,7 @@ static lsm_error lsm_entry_read_attrs(uint64_t *sum, lsm_entry_handle *handle, for (uint64_t i = 0; i < attr_count; i++) { LSM_RES(lsm_fread(&attr_type, sum, db_file, sizeof(uint8_t), 1)); LSM_RES(lsm_entry_read_str(&val, sum, db_file)); - lsm_entry_attr_insert(handle, attr_type, val); + LSM_RES(lsm_entry_attr_insert(handle, attr_type, val)); } return lsm_error_ok; diff --git a/lsm/src/store/lsm_store_disk_write.c b/lsm/src/store/lsm_store_disk_write.c index 3c9293d..4a3de1b 100644 --- a/lsm/src/store/lsm_store_disk_write.c +++ b/lsm/src/store/lsm_store_disk_write.c @@ -75,7 +75,9 @@ lsm_error lsm_write_idx_entry(uint64_t *size, FILE *idx_file, lsm_entry *entry, return lsm_error_ok; } -lsm_error lsm_entry_sync(lsm_store *store, lsm_entry_handle *handle) { +lsm_error lsm_entry_sync(lsm_entry_handle *handle) { + lsm_store *store = handle->store; + pthread_mutex_lock(&store->db_lock); uint64_t db_entry_index = store->db_file_size; diff --git a/src/lander/lander_get.c b/src/lander/lander_get.c index 5d1be5f..7c467b5 100644 --- a/src/lander/lander_get.c +++ b/src/lander/lander_get.c @@ -93,8 +93,6 @@ bool lander_get_entry_lsm(event_loop_conn *conn) { bool lander_stream_body_to_client(event_loop_conn *conn) { http_loop_ctx *ctx = conn->ctx; lander_ctx *c_ctx = ctx->c; - http_loop_gctx *gctx = ctx->g; - lander_gctx *c_gctx = gctx->c; if ((c_ctx->entry == NULL) || (ctx->res.body.expected_len == ctx->res.body.len)) { @@ -105,8 +103,8 @@ bool lander_stream_body_to_client(event_loop_conn *conn) { ctx->res.body.expected_len - ctx->res.body.len); uint64_t read = 0; - lsm_entry_data_read(&read, (char *)&conn->wbuf[conn->wbuf_size], - c_gctx->store, c_ctx->entry, to_write); + lsm_entry_data_read(&read, (char *)&conn->wbuf[conn->wbuf_size], c_ctx->entry, + to_write); ctx->res.body.len += read; conn->wbuf_size += read; diff --git a/src/lander/lander_post.c b/src/lander/lander_post.c index ee1a9cb..3f8f758 100644 --- a/src/lander/lander_post.c +++ b/src/lander/lander_post.c @@ -151,11 +151,9 @@ bool lander_post_redirect_body_to_attr(event_loop_conn *conn) { bool lander_entry_sync(event_loop_conn *conn) { http_loop_ctx *ctx = conn->ctx; - http_loop_gctx *gctx = ctx->g; - lander_gctx *c_gctx = gctx->c; lander_ctx *c_ctx = ctx->c; - if (lsm_entry_sync(c_gctx->store, c_ctx->entry) != lsm_error_ok) { + if (lsm_entry_sync(c_ctx->entry) != lsm_error_ok) { ctx->res.status = http_internal_server_error; } @@ -180,8 +178,6 @@ bool lander_post_paste_lsm(event_loop_conn *conn) { bool lander_stream_body_to_entry(event_loop_conn *conn) { http_loop_ctx *ctx = conn->ctx; lander_ctx *c_ctx = ctx->c; - http_loop_gctx *gctx = ctx->g; - lander_gctx *c_gctx = gctx->c; uint64_t to_append = MIN(conn->rbuf_size - conn->rbuf_read, @@ -189,7 +185,7 @@ bool lander_stream_body_to_entry(event_loop_conn *conn) { lsm_str *data; lsm_str_init_copy_n(&data, (char *)&conn->rbuf[conn->rbuf_read], to_append); - lsm_entry_data_append(c_gctx->store, c_ctx->entry, data); + lsm_entry_data_append(c_ctx->entry, data); conn->rbuf_read += to_append; From 9b223d04a0226ae4234d39d0778ccee4d2dcf688 Mon Sep 17 00:00:00 2001 From: Jef Roosens Date: Thu, 9 Nov 2023 22:40:06 +0100 Subject: [PATCH 47/70] feat(lsm): sync database when closing handle --- include/lander.h | 2 -- lsm/include/lsm/store.h | 8 -------- lsm/src/_include/lsm/store_internal.h | 21 +++++++++++++++++++++ lsm/src/store/lsm_store_disk_read.c | 2 +- lsm/src/store/lsm_store_disk_write.c | 2 +- lsm/src/store/lsm_store_entry.c | 10 ++++++++-- src/lander/lander.c | 5 ++--- src/lander/lander_post.c | 11 ----------- 8 files changed, 33 insertions(+), 28 deletions(-) diff --git a/include/lander.h b/include/lander.h index 3c812c1..88bfab9 100644 --- a/include/lander.h +++ b/include/lander.h @@ -57,6 +57,4 @@ bool lander_get_entry_lsm(event_loop_conn *conn); bool lander_post_redirect_body_to_attr(event_loop_conn *conn); -bool lander_entry_sync(event_loop_conn *conn); - #endif diff --git a/lsm/include/lsm/store.h b/lsm/include/lsm/store.h index c7d46d7..9410746 100644 --- a/lsm/include/lsm/store.h +++ b/lsm/include/lsm/store.h @@ -213,14 +213,6 @@ lsm_error lsm_entry_data_append_raw(lsm_entry_handle *handle, char *data, lsm_error lsm_entry_data_read(uint64_t *out, char *buf, lsm_entry_handle *handle, uint64_t len); -/** - * Persist the entry's data to disk. - * - * @param store store to persist entry in - * @param handle handle to entry to persist - */ -lsm_error lsm_entry_sync(lsm_entry_handle *handle); - /** * Return the length of the entry's data. * diff --git a/lsm/src/_include/lsm/store_internal.h b/lsm/src/_include/lsm/store_internal.h index 2a5856c..cebb41b 100644 --- a/lsm/src/_include/lsm/store_internal.h +++ b/lsm/src/_include/lsm/store_internal.h @@ -88,4 +88,25 @@ struct lsm_store { */ lsm_error lsm_store_load_db(lsm_store *store); +/** + * Close & free the handle without updating the database + * + * @param handle handle to close + */ +void lsm_entry_close_no_disk(lsm_entry_handle *handle); + +/** + * Write a new insert to the database. + * + * @param handle handle to added entry + */ +lsm_error lsm_entry_disk_insert(lsm_entry_handle *handle); + +/** + * Remove an entry from the database + * + * @param handle handle to the removed entry + */ +lsm_error lsm_entry_disk_remove(lsm_entry_handle *handle); + #endif diff --git a/lsm/src/store/lsm_store_disk_read.c b/lsm/src/store/lsm_store_disk_read.c index cba7a7b..040708a 100644 --- a/lsm/src/store/lsm_store_disk_read.c +++ b/lsm/src/store/lsm_store_disk_read.c @@ -172,7 +172,7 @@ lsm_error lsm_store_load_db(lsm_store *store) { LSM_RES(lsm_entry_read_attrs(NULL, handle, store->db_file)); handle->wrapper->entry->idx_file_offset = idx_file_offset; - lsm_entry_close(handle); + lsm_entry_close_no_disk(handle); store->db_file_size += db_dim[1]; } diff --git a/lsm/src/store/lsm_store_disk_write.c b/lsm/src/store/lsm_store_disk_write.c index 4a3de1b..ffe182f 100644 --- a/lsm/src/store/lsm_store_disk_write.c +++ b/lsm/src/store/lsm_store_disk_write.c @@ -75,7 +75,7 @@ lsm_error lsm_write_idx_entry(uint64_t *size, FILE *idx_file, lsm_entry *entry, return lsm_error_ok; } -lsm_error lsm_entry_sync(lsm_entry_handle *handle) { +lsm_error lsm_entry_disk_insert(lsm_entry_handle *handle) { lsm_store *store = handle->store; pthread_mutex_lock(&store->db_lock); diff --git a/lsm/src/store/lsm_store_entry.c b/lsm/src/store/lsm_store_entry.c index 58eba29..45ead55 100644 --- a/lsm/src/store/lsm_store_entry.c +++ b/lsm/src/store/lsm_store_entry.c @@ -47,13 +47,19 @@ lsm_error lsm_entry_handle_init(lsm_entry_handle **out) { return lsm_error_ok; } +void lsm_entry_close_no_disk(lsm_entry_handle *handle) { + pthread_rwlock_unlock(&handle->wrapper->lock); + free(handle); +} + void lsm_entry_close(lsm_entry_handle *handle) { if (handle->f != NULL) { fclose(handle->f); } - pthread_rwlock_unlock(&handle->wrapper->lock); - free(handle); + // TODO handle errors here + lsm_entry_disk_insert(handle); + lsm_entry_close_no_disk(handle); } bool lsm_entry_attr_present(lsm_entry_handle *handle, uint8_t type) { diff --git a/src/lander/lander.c b/src/lander/lander.c index 8045f28..57f5c5e 100644 --- a/src/lander/lander.c +++ b/src/lander/lander.c @@ -25,7 +25,7 @@ http_route lander_routes[] = { .path = "^/s(l?)/([^/]*)$", .steps = {http_loop_step_auth, lander_post_redirect_lsm, http_loop_step_body_to_buf, lander_post_redirect_body_to_attr, - lander_entry_sync, NULL}, + NULL}, .steps_res = {http_loop_step_write_header, http_loop_step_write_body, NULL}, }, @@ -33,8 +33,7 @@ http_route lander_routes[] = { .method = http_post, .path = "^/p(l?)/([^/]*)$", .steps = {http_loop_step_auth, http_loop_step_parse_content_length, - lander_post_paste_lsm, lander_stream_body_to_entry, - lander_entry_sync, NULL}, + lander_post_paste_lsm, lander_stream_body_to_entry, NULL}, .steps_res = {http_loop_step_write_header, http_loop_step_write_body, NULL}}, }; diff --git a/src/lander/lander_post.c b/src/lander/lander_post.c index 3f8f758..429ea39 100644 --- a/src/lander/lander_post.c +++ b/src/lander/lander_post.c @@ -149,17 +149,6 @@ bool lander_post_redirect_body_to_attr(event_loop_conn *conn) { return true; } -bool lander_entry_sync(event_loop_conn *conn) { - http_loop_ctx *ctx = conn->ctx; - lander_ctx *c_ctx = ctx->c; - - if (lsm_entry_sync(c_ctx->entry) != lsm_error_ok) { - ctx->res.status = http_internal_server_error; - } - - return true; -} - bool lander_post_paste_lsm(event_loop_conn *conn) { http_loop_ctx *ctx = conn->ctx; lander_ctx *c_ctx = ctx->c; From ddc38452bea958b4ceeb00ff51550973fab35d8e Mon Sep 17 00:00:00 2001 From: Jef Roosens Date: Fri, 10 Nov 2023 11:34:55 +0100 Subject: [PATCH 48/70] feat(lsm): don't sync non-dirty entries --- lsm/src/_include/lsm/store_internal.h | 11 ++++------- lsm/src/store/lsm_store.c | 4 ++++ lsm/src/store/lsm_store_disk_read.c | 6 +++++- lsm/src/store/lsm_store_entry.c | 17 ++++++++++------- 4 files changed, 23 insertions(+), 15 deletions(-) diff --git a/lsm/src/_include/lsm/store_internal.h b/lsm/src/_include/lsm/store_internal.h index cebb41b..ccafe99 100644 --- a/lsm/src/_include/lsm/store_internal.h +++ b/lsm/src/_include/lsm/store_internal.h @@ -59,8 +59,12 @@ void lsm_entry_wrapper_free(lsm_entry_wrapper *wrapper); struct lsm_entry_handle { lsm_entry_wrapper *wrapper; lsm_store *store; + // Either read or append, depending on how it was opened FILE *f; + // Current position in the file pointer uint64_t pos; + // Whether the entry's metadata has changed + bool dirty; }; lsm_error lsm_entry_handle_init(lsm_entry_handle **out); @@ -88,13 +92,6 @@ struct lsm_store { */ lsm_error lsm_store_load_db(lsm_store *store); -/** - * Close & free the handle without updating the database - * - * @param handle handle to close - */ -void lsm_entry_close_no_disk(lsm_entry_handle *handle); - /** * Write a new insert to the database. * diff --git a/lsm/src/store/lsm_store.c b/lsm/src/store/lsm_store.c index 2da7c51..022e68b 100644 --- a/lsm/src/store/lsm_store.c +++ b/lsm/src/store/lsm_store.c @@ -150,6 +150,9 @@ lsm_error lsm_store_insert(lsm_entry_handle **out, lsm_store *store, handle->wrapper = wrapper; handle->store = store; + // Newly inserted entries are always dirty + handle->dirty = true; + *out = handle; return lsm_error_ok; @@ -189,6 +192,7 @@ lsm_error lsm_entry_data_append(lsm_entry_handle *handle, lsm_str *data) { } entry->data_len = new_len; + handle->dirty = true; return lsm_error_ok; } diff --git a/lsm/src/store/lsm_store_disk_read.c b/lsm/src/store/lsm_store_disk_read.c index 040708a..5c71dee 100644 --- a/lsm/src/store/lsm_store_disk_read.c +++ b/lsm/src/store/lsm_store_disk_read.c @@ -172,7 +172,11 @@ lsm_error lsm_store_load_db(lsm_store *store) { LSM_RES(lsm_entry_read_attrs(NULL, handle, store->db_file)); handle->wrapper->entry->idx_file_offset = idx_file_offset; - lsm_entry_close_no_disk(handle); + + // We explicitely set the dirty flag here to prevent writing to the datase + // when reading it in + handle->dirty = false; + lsm_entry_close(handle); store->db_file_size += db_dim[1]; } diff --git a/lsm/src/store/lsm_store_entry.c b/lsm/src/store/lsm_store_entry.c index 45ead55..4623f36 100644 --- a/lsm/src/store/lsm_store_entry.c +++ b/lsm/src/store/lsm_store_entry.c @@ -47,19 +47,18 @@ lsm_error lsm_entry_handle_init(lsm_entry_handle **out) { return lsm_error_ok; } -void lsm_entry_close_no_disk(lsm_entry_handle *handle) { - pthread_rwlock_unlock(&handle->wrapper->lock); - free(handle); -} - void lsm_entry_close(lsm_entry_handle *handle) { if (handle->f != NULL) { fclose(handle->f); } // TODO handle errors here - lsm_entry_disk_insert(handle); - lsm_entry_close_no_disk(handle); + if (handle->dirty) { + lsm_entry_disk_insert(handle); + } + + pthread_rwlock_unlock(&handle->wrapper->lock); + free(handle); } bool lsm_entry_attr_present(lsm_entry_handle *handle, uint8_t type) { @@ -158,6 +157,8 @@ lsm_error lsm_entry_attr_remove(lsm_str **out, lsm_entry_handle *handle, entry->attrs.count--; entry->attrs.bitmap[type / 64] &= ~(((uint64_t)1) << (type % 64)); + handle->dirty = true; + return lsm_error_ok; } @@ -183,6 +184,8 @@ lsm_error lsm_entry_attr_insert(lsm_entry_handle *handle, uint8_t type, entry->attrs.count++; entry->attrs.bitmap[type / 64] |= ((uint64_t)1) << (type % 64); + handle->dirty = true; + return lsm_error_ok; } From 418de748f0523b394f00f3de7e6de45c8c6b8301 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Fri, 10 Nov 2023 16:10:20 +0100 Subject: [PATCH 49/70] feat(lsm): pave the way for removals and updates --- lsm/include/lsm/store.h | 2 -- lsm/src/_include/lsm/store_internal.h | 24 ++++++++++++++++++------ lsm/src/store/lsm_store.c | 4 ++-- lsm/src/store/lsm_store_disk_read.c | 4 +--- lsm/src/store/lsm_store_entry.c | 12 +++++++++--- 5 files changed, 30 insertions(+), 16 deletions(-) diff --git a/lsm/include/lsm/store.h b/lsm/include/lsm/store.h index 9410746..31eb19b 100644 --- a/lsm/include/lsm/store.h +++ b/lsm/include/lsm/store.h @@ -7,8 +7,6 @@ #include "lsm.h" #include "lsm/str.h" -#define LSM_STORE_DISK_THRESHOLD 1024 - /** * A handle referencing an entry inside a store. Read/write operations from/to * the entry go through this handle. diff --git a/lsm/src/_include/lsm/store_internal.h b/lsm/src/_include/lsm/store_internal.h index ccafe99..bb35772 100644 --- a/lsm/src/_include/lsm/store_internal.h +++ b/lsm/src/_include/lsm/store_internal.h @@ -19,9 +19,8 @@ typedef struct lsm_attr { /** * An entry inside an LSM store. * - * Each entry consists of the key it's stored behind, zero or more attributes - * (metadata) and a data field. The data field can be stored on disk or - * in-memory, depending on the size. + * Each entry consists of the key it's stored behind, zero or more attributes + * (metadata) and a data file. */ typedef struct lsm_entry { lsm_str *key; @@ -56,6 +55,12 @@ typedef struct lsm_entry_wrapper { lsm_error lsm_entry_wrapper_init(lsm_entry_wrapper **ptr); void lsm_entry_wrapper_free(lsm_entry_wrapper *wrapper); +typedef enum lsm_entry_handle_state : uint8_t { + lsm_entry_handle_state_new = 1 << 0, + lsm_entry_handle_state_updated = 1 << 1, + lsm_entry_handle_state_removed = 1 << 2, +} lsm_entry_handle_state; + struct lsm_entry_handle { lsm_entry_wrapper *wrapper; lsm_store *store; @@ -63,8 +68,8 @@ struct lsm_entry_handle { FILE *f; // Current position in the file pointer uint64_t pos; - // Whether the entry's metadata has changed - bool dirty; + // Required to determine in what way the database files need to be synced + uint64_t states; }; lsm_error lsm_entry_handle_init(lsm_entry_handle **out); @@ -100,10 +105,17 @@ lsm_error lsm_store_load_db(lsm_store *store); lsm_error lsm_entry_disk_insert(lsm_entry_handle *handle); /** - * Remove an entry from the database + * Remove an entry from the database. * * @param handle handle to the removed entry */ lsm_error lsm_entry_disk_remove(lsm_entry_handle *handle); +/** + * Update an existing entry already in the store. + * + * @param handle to updated entry + */ +lsm_error lsm_entry_disk_update(lsm_entry_handle *handle); + #endif diff --git a/lsm/src/store/lsm_store.c b/lsm/src/store/lsm_store.c index 022e68b..8afc56a 100644 --- a/lsm/src/store/lsm_store.c +++ b/lsm/src/store/lsm_store.c @@ -151,7 +151,7 @@ lsm_error lsm_store_insert(lsm_entry_handle **out, lsm_store *store, handle->store = store; // Newly inserted entries are always dirty - handle->dirty = true; + handle->states |= lsm_entry_handle_state_new; *out = handle; @@ -192,7 +192,7 @@ lsm_error lsm_entry_data_append(lsm_entry_handle *handle, lsm_str *data) { } entry->data_len = new_len; - handle->dirty = true; + handle->states |= lsm_entry_handle_state_updated; return lsm_error_ok; } diff --git a/lsm/src/store/lsm_store_disk_read.c b/lsm/src/store/lsm_store_disk_read.c index 5c71dee..eaaed32 100644 --- a/lsm/src/store/lsm_store_disk_read.c +++ b/lsm/src/store/lsm_store_disk_read.c @@ -173,9 +173,7 @@ lsm_error lsm_store_load_db(lsm_store *store) { handle->wrapper->entry->idx_file_offset = idx_file_offset; - // We explicitely set the dirty flag here to prevent writing to the datase - // when reading it in - handle->dirty = false; + handle->states = 0; lsm_entry_close(handle); store->db_file_size += db_dim[1]; diff --git a/lsm/src/store/lsm_store_entry.c b/lsm/src/store/lsm_store_entry.c index 4623f36..8212ba6 100644 --- a/lsm/src/store/lsm_store_entry.c +++ b/lsm/src/store/lsm_store_entry.c @@ -53,8 +53,14 @@ void lsm_entry_close(lsm_entry_handle *handle) { } // TODO handle errors here - if (handle->dirty) { + if ((handle->states & lsm_entry_handle_state_new) && + !(handle->states & lsm_entry_handle_state_removed)) { lsm_entry_disk_insert(handle); + } else if ((handle->states & lsm_entry_handle_state_removed) && + !(handle->states & lsm_entry_handle_state_new)) { + /* lsm_entry_disk_remove(handle); */ + } else if (handle->states & lsm_entry_handle_state_updated) { + /* lsm_entry_disk_update(handle); */ } pthread_rwlock_unlock(&handle->wrapper->lock); @@ -157,7 +163,7 @@ lsm_error lsm_entry_attr_remove(lsm_str **out, lsm_entry_handle *handle, entry->attrs.count--; entry->attrs.bitmap[type / 64] &= ~(((uint64_t)1) << (type % 64)); - handle->dirty = true; + handle->states |= lsm_entry_handle_state_updated; return lsm_error_ok; } @@ -184,7 +190,7 @@ lsm_error lsm_entry_attr_insert(lsm_entry_handle *handle, uint8_t type, entry->attrs.count++; entry->attrs.bitmap[type / 64] |= ((uint64_t)1) << (type % 64); - handle->dirty = true; + handle->states |= lsm_entry_handle_state_updated; return lsm_error_ok; } From a6887d4094bb8e360179e5506cfbe200ba324858 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Fri, 10 Nov 2023 16:23:27 +0100 Subject: [PATCH 50/70] refactor(lsm): rename some variables --- lsm/src/_include/lsm/store_internal.h | 18 +++++++----- lsm/src/store/lsm_store.c | 4 +-- lsm/src/store/lsm_store_disk_read.c | 34 +++++++++++------------ lsm/src/store/lsm_store_disk_write.c | 40 +++++++++++++-------------- 4 files changed, 49 insertions(+), 47 deletions(-) diff --git a/lsm/src/_include/lsm/store_internal.h b/lsm/src/_include/lsm/store_internal.h index bb35772..fb50838 100644 --- a/lsm/src/_include/lsm/store_internal.h +++ b/lsm/src/_include/lsm/store_internal.h @@ -78,14 +78,18 @@ struct lsm_store { lsm_trie *trie; lsm_str *data_path; - FILE *db_file; - uint64_t db_file_size; - pthread_mutex_t db_lock; + struct { + FILE *f; + uint64_t size; + pthread_mutex_t lock; + } db; - FILE *idx_file; - uint64_t idx_file_block_count; - uint64_t idx_file_size; - pthread_mutex_t idx_lock; + struct { + FILE *f; + uint64_t size; + uint64_t block_count; + pthread_mutex_t lock; + } idx; }; /** diff --git a/lsm/src/store/lsm_store.c b/lsm/src/store/lsm_store.c index 8afc56a..2185418 100644 --- a/lsm/src/store/lsm_store.c +++ b/lsm/src/store/lsm_store.c @@ -22,8 +22,8 @@ lsm_error lsm_store_init(lsm_store **ptr) { return res; } - pthread_mutex_init(&store->db_lock, NULL); - pthread_mutex_init(&store->idx_lock, NULL); + pthread_mutex_init(&store->db.lock, NULL); + pthread_mutex_init(&store->idx.lock, NULL); *ptr = store; diff --git a/lsm/src/store/lsm_store_disk_read.c b/lsm/src/store/lsm_store_disk_read.c index eaaed32..8ce315c 100644 --- a/lsm/src/store/lsm_store_disk_read.c +++ b/lsm/src/store/lsm_store_disk_read.c @@ -70,8 +70,8 @@ lsm_error lsm_store_load(lsm_store **ptr, lsm_str *data_path) { } store->data_path = data_path; - store->db_file = db_file; - store->idx_file = idx_file; + store->db.f = db_file; + store->idx.f = idx_file; LSM_RES(lsm_store_load_db(store)); @@ -144,53 +144,53 @@ lsm_error lsm_store_load_db(lsm_store *store) { lsm_entry_handle *handle; bool valid_entry; - rewind(store->idx_file); + rewind(store->idx.f); // idx file starts with block count - LSM_RES(lsm_fread(&store->idx_file_block_count, &store->idx_file_size, - store->idx_file, sizeof(uint64_t), 1)); + LSM_RES(lsm_fread(&store->idx.block_count, &store->idx.size, store->idx.f, + sizeof(uint64_t), 1)); - for (uint64_t i = 0; i < store->idx_file_block_count; i++) { - uint64_t idx_file_offset = store->idx_file_size; + for (uint64_t i = 0; i < store->idx.block_count; i++) { + uint64_t idx_file_offset = store->idx.size; - LSM_RES(lsm_fread(&valid_entry, &store->idx_file_size, store->idx_file, + LSM_RES(lsm_fread(&valid_entry, &store->idx.size, store->idx.f, sizeof(bool), 1)); if (valid_entry) { - LSM_RES(lsm_entry_read_str(&key, &store->idx_file_size, store->idx_file)); - LSM_RES(lsm_fread(&db_dim, &store->idx_file_size, store->idx_file, + LSM_RES(lsm_entry_read_str(&key, &store->idx.size, store->idx.f)); + LSM_RES(lsm_fread(&db_dim, &store->idx.size, store->idx.f, sizeof(uint64_t), 2)); LSM_RES(lsm_store_insert(&handle, store, key)); // Read attributes from database file - if (fseek(store->db_file, db_dim[0], SEEK_SET) != 0) { + if (fseek(store->db.f, db_dim[0], SEEK_SET) != 0) { return lsm_error_failed_io; } - LSM_RES(lsm_fread(&handle->wrapper->entry->data_len, NULL, store->db_file, + LSM_RES(lsm_fread(&handle->wrapper->entry->data_len, NULL, store->db.f, sizeof(uint64_t), 1)); - LSM_RES(lsm_entry_read_attrs(NULL, handle, store->db_file)); + LSM_RES(lsm_entry_read_attrs(NULL, handle, store->db.f)); handle->wrapper->entry->idx_file_offset = idx_file_offset; handle->states = 0; lsm_entry_close(handle); - store->db_file_size += db_dim[1]; + store->db.size += db_dim[1]; } // Simply skip the invalid entry else { uint64_t key_len; - LSM_RES(lsm_fread(&key_len, &store->idx_file_size, store->idx_file, + LSM_RES(lsm_fread(&key_len, &store->idx.size, store->idx.f, sizeof(uint64_t), 1)); uint64_t remaining = key_len + 2 * sizeof(uint64_t); - if (fseek(store->idx_file, remaining, SEEK_CUR) != 0) { + if (fseek(store->idx.f, remaining, SEEK_CUR) != 0) { return lsm_error_failed_io; } - store->idx_file_size += remaining; + store->idx.size += remaining; } } diff --git a/lsm/src/store/lsm_store_disk_write.c b/lsm/src/store/lsm_store_disk_write.c index ffe182f..3808bb8 100644 --- a/lsm/src/store/lsm_store_disk_write.c +++ b/lsm/src/store/lsm_store_disk_write.c @@ -78,55 +78,53 @@ lsm_error lsm_write_idx_entry(uint64_t *size, FILE *idx_file, lsm_entry *entry, lsm_error lsm_entry_disk_insert(lsm_entry_handle *handle) { lsm_store *store = handle->store; - pthread_mutex_lock(&store->db_lock); + pthread_mutex_lock(&store->db.lock); - uint64_t db_entry_index = store->db_file_size; + uint64_t db_entry_index = store->db.size; uint64_t db_entry_size; - lsm_error res = - lsm_write_db_entry(&db_entry_size, store->db_file, handle->wrapper->entry, - store->db_file_size); - fflush(store->db_file); + lsm_error res = lsm_write_db_entry(&db_entry_size, store->db.f, + handle->wrapper->entry, store->db.size); + fflush(store->db.f); - pthread_mutex_unlock(&store->db_lock); + pthread_mutex_unlock(&store->db.lock); if (res != lsm_error_ok) { return res; } // Append entry to index file - pthread_mutex_lock(&store->idx_lock); + pthread_mutex_lock(&store->idx.lock); - uint64_t idx_entry_index = store->idx_file_size; + uint64_t idx_entry_index = store->idx.size; uint64_t idx_entry_size; - res = lsm_write_idx_entry(&idx_entry_size, store->idx_file, - handle->wrapper->entry, db_entry_index, - db_entry_size, store->idx_file_size); + res = + lsm_write_idx_entry(&idx_entry_size, store->idx.f, handle->wrapper->entry, + db_entry_index, db_entry_size, store->idx.size); if (res == lsm_error_ok) { // Update the counter at the beginning of the file - rewind(store->idx_file); + rewind(store->idx.f); - uint64_t new_block_count = store->idx_file_block_count + 1; + uint64_t new_block_count = store->idx.block_count + 1; - res = lsm_fwrite(NULL, store->idx_file, sizeof(uint64_t), 1, - &new_block_count); + res = lsm_fwrite(NULL, store->idx.f, sizeof(uint64_t), 1, &new_block_count); if (res == lsm_error_ok) { // Only if we successfully updated the on-disk counter do we make the code // aware that the files' sizes have increased. This way, if a write to the // counter fails, the code will simply reuse the already written content. - store->idx_file_size += idx_entry_size; - store->idx_file_block_count = new_block_count; - store->db_file_size += db_entry_size; + store->idx.size += idx_entry_size; + store->idx.block_count = new_block_count; + store->db.size += db_entry_size; handle->wrapper->entry->idx_file_offset = idx_entry_index; } } - fflush(store->idx_file); - pthread_mutex_unlock(&store->idx_lock); + fflush(store->idx.f); + pthread_mutex_unlock(&store->idx.lock); return res; } From 51e4a203e988b5ca03690fc8a046b2346e8d5d5b Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Sat, 11 Nov 2023 13:10:14 +0100 Subject: [PATCH 51/70] feat(lsm): move keys to db file; idx entries are now constant length --- lsm/src/store/lsm_store_disk_read.c | 77 ++++++++++++++-------------- lsm/src/store/lsm_store_disk_write.c | 14 ++--- 2 files changed, 43 insertions(+), 48 deletions(-) diff --git a/lsm/src/store/lsm_store_disk_read.c b/lsm/src/store/lsm_store_disk_read.c index 8ce315c..72e34bd 100644 --- a/lsm/src/store/lsm_store_disk_read.c +++ b/lsm/src/store/lsm_store_disk_read.c @@ -137,12 +137,41 @@ static lsm_error lsm_entry_read_attrs(uint64_t *sum, lsm_entry_handle *handle, return lsm_error_ok; } +static lsm_error lsm_fseek(FILE *f, uint64_t pos) { + if (fseek(f, pos, SEEK_SET) != 0) { + return lsm_error_failed_io; + } + + return lsm_error_ok; +} + +/** + * Insert a new entry by reading it from the db file + */ +lsm_error lsm_store_insert_from_db(lsm_store *store, uint64_t pos, + uint64_t idx_file_offset) { + LSM_RES(lsm_fseek(store->db.f, pos)); + + lsm_str *key; + LSM_RES(lsm_entry_read_str(&key, &store->db.size, store->db.f)); + + lsm_entry_handle *handle; + LSM_RES(lsm_store_insert(&handle, store, key)); + + LSM_RES(lsm_fread(&handle->wrapper->entry->data_len, &store->db.size, + store->db.f, sizeof(uint64_t), 1)); + LSM_RES(lsm_entry_read_attrs(&store->db.size, handle, store->db.f)); + + handle->wrapper->entry->idx_file_offset = idx_file_offset; + + handle->states = 0; + lsm_entry_close(handle); + + return lsm_error_ok; +} lsm_error lsm_store_load_db(lsm_store *store) { uint64_t db_dim[2]; - lsm_str *key; - lsm_entry_handle *handle; - bool valid_entry; rewind(store->idx.f); @@ -153,45 +182,15 @@ lsm_error lsm_store_load_db(lsm_store *store) { for (uint64_t i = 0; i < store->idx.block_count; i++) { uint64_t idx_file_offset = store->idx.size; - LSM_RES(lsm_fread(&valid_entry, &store->idx.size, store->idx.f, - sizeof(bool), 1)); + LSM_RES(lsm_fread(&db_dim, &store->idx.size, store->idx.f, sizeof(uint64_t), + 2)); - if (valid_entry) { - LSM_RES(lsm_entry_read_str(&key, &store->idx.size, store->idx.f)); - LSM_RES(lsm_fread(&db_dim, &store->idx.size, store->idx.f, - sizeof(uint64_t), 2)); - LSM_RES(lsm_store_insert(&handle, store, key)); - - // Read attributes from database file - if (fseek(store->db.f, db_dim[0], SEEK_SET) != 0) { - return lsm_error_failed_io; - } - - LSM_RES(lsm_fread(&handle->wrapper->entry->data_len, NULL, store->db.f, - sizeof(uint64_t), 1)); - LSM_RES(lsm_entry_read_attrs(NULL, handle, store->db.f)); - - handle->wrapper->entry->idx_file_offset = idx_file_offset; - - handle->states = 0; - lsm_entry_close(handle); - - store->db.size += db_dim[1]; + // We zero out the length of entries if they're no longer valid + if (db_dim[1] == 0) { + continue; } - // Simply skip the invalid entry - else { - uint64_t key_len; - LSM_RES(lsm_fread(&key_len, &store->idx.size, store->idx.f, - sizeof(uint64_t), 1)); - uint64_t remaining = key_len + 2 * sizeof(uint64_t); - - if (fseek(store->idx.f, remaining, SEEK_CUR) != 0) { - return lsm_error_failed_io; - } - - store->idx.size += remaining; - } + LSM_RES(lsm_store_insert_from_db(store, db_dim[0], idx_file_offset)); } return lsm_error_ok; diff --git a/lsm/src/store/lsm_store_disk_write.c b/lsm/src/store/lsm_store_disk_write.c index 3808bb8..acb0015 100644 --- a/lsm/src/store/lsm_store_disk_write.c +++ b/lsm/src/store/lsm_store_disk_write.c @@ -47,6 +47,7 @@ lsm_error lsm_write_db_entry(uint64_t *size, FILE *db_file, lsm_entry *entry, LSM_RES(lsm_fseek(db_file, pos)); + LSM_RES(lsm_write_str(size, db_file, entry->key)); LSM_RES(lsm_fwrite(size, db_file, sizeof(uint64_t), 1, &entry->data_len)); LSM_RES(lsm_fwrite(size, db_file, sizeof(uint8_t), 1, &entry->attrs.count)); @@ -59,16 +60,12 @@ lsm_error lsm_write_db_entry(uint64_t *size, FILE *db_file, lsm_entry *entry, return lsm_error_ok; } -lsm_error lsm_write_idx_entry(uint64_t *size, FILE *idx_file, lsm_entry *entry, - uint64_t offset, uint64_t len, uint64_t pos) { +lsm_error lsm_write_idx_entry(uint64_t *size, FILE *idx_file, uint64_t offset, + uint64_t len, uint64_t pos) { *size = 0; LSM_RES(lsm_fseek(idx_file, pos)); - bool valid_entry_marker = true; - LSM_RES(lsm_fwrite(size, idx_file, sizeof(bool), 1, &valid_entry_marker)); - - LSM_RES(lsm_write_str(size, idx_file, entry->key)); LSM_RES(lsm_fwrite(size, idx_file, sizeof(uint64_t), 1, &offset)); LSM_RES(lsm_fwrite(size, idx_file, sizeof(uint64_t), 1, &len)); @@ -99,9 +96,8 @@ lsm_error lsm_entry_disk_insert(lsm_entry_handle *handle) { uint64_t idx_entry_index = store->idx.size; uint64_t idx_entry_size; - res = - lsm_write_idx_entry(&idx_entry_size, store->idx.f, handle->wrapper->entry, - db_entry_index, db_entry_size, store->idx.size); + res = lsm_write_idx_entry(&idx_entry_size, store->idx.f, db_entry_index, + db_entry_size, store->idx.size); if (res == lsm_error_ok) { // Update the counter at the beginning of the file From b40389bbe2d5eda10e751ea9ab557316ea9738d0 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Sun, 12 Nov 2023 12:48:36 +0100 Subject: [PATCH 52/70] feat(lsm): implement basic remove --- lsm/example/test.c | 17 ++++++++++------- lsm/include/lsm/store.h | 7 +++++++ lsm/src/_include/lsm/store_internal.h | 7 +++++++ lsm/src/store/lsm_store.c | 4 ++++ lsm/src/store/lsm_store_disk_write.c | 25 +++++++++++++++++++++++++ lsm/src/store/lsm_store_entry.c | 13 ++++++++++++- 6 files changed, 65 insertions(+), 8 deletions(-) diff --git a/lsm/example/test.c b/lsm/example/test.c index bd78f2e..660cd04 100644 --- a/lsm/example/test.c +++ b/lsm/example/test.c @@ -20,18 +20,13 @@ int main() { lsm_str *attr; lsm_str_init_copy(&attr, "some attribute value"); - lsm_entry_attr_insert(handle, lsm_attr_type_content_type, attr); + lsm_entry_attr_insert(handle, 1, attr); lsm_str *data; lsm_str_init_copy(&data, "hello"); for (int i = 0; i < 50; i++) { - lsm_entry_data_append(store, handle, data); - } - - if (lsm_entry_sync(store, handle) != lsm_error_ok) { - printf("godver"); - return 1; + lsm_entry_data_append(handle, data); } lsm_entry_close(handle); @@ -50,4 +45,12 @@ int main() { total += read; } printf("\n%lu", total); + + lsm_entry_close(handle); + + assert(lsm_store_open_write(&handle, store, key) == lsm_error_ok); + lsm_entry_remove(handle); + lsm_entry_close(handle); + + assert(lsm_store_open_read(&handle, store, key) == lsm_error_not_found); } diff --git a/lsm/include/lsm/store.h b/lsm/include/lsm/store.h index 31eb19b..45eda7e 100644 --- a/lsm/include/lsm/store.h +++ b/lsm/include/lsm/store.h @@ -176,6 +176,13 @@ void lsm_entry_close(lsm_entry_handle *handle); lsm_error lsm_store_insert(lsm_entry_handle **out, lsm_store *store, lsm_str *key); +/** + * Mark the entry as removed. + * + * @param handle handle to entry to remove + */ +void lsm_entry_remove(lsm_entry_handle *handle); + /** * Append new data to the given entry, which is expected to be in the store. * diff --git a/lsm/src/_include/lsm/store_internal.h b/lsm/src/_include/lsm/store_internal.h index fb50838..afbd4f3 100644 --- a/lsm/src/_include/lsm/store_internal.h +++ b/lsm/src/_include/lsm/store_internal.h @@ -40,6 +40,13 @@ typedef struct lsm_entry { */ lsm_error lsm_entry_init(lsm_entry **ptr); +/** + * Deallocate an existing entry + * + * @param entry pointer to entry + */ +void lsm_entry_free(lsm_entry *entry); + /** * Deallocate an existing lsm_entry object. * diff --git a/lsm/src/store/lsm_store.c b/lsm/src/store/lsm_store.c index 2185418..57eacb7 100644 --- a/lsm/src/store/lsm_store.c +++ b/lsm/src/store/lsm_store.c @@ -158,6 +158,10 @@ lsm_error lsm_store_insert(lsm_entry_handle **out, lsm_store *store, return lsm_error_ok; } +void lsm_entry_remove(lsm_entry_handle *handle) { + handle->states |= lsm_entry_handle_state_removed; +} + lsm_error lsm_entry_data_append(lsm_entry_handle *handle, lsm_str *data) { if (lsm_str_len(data) == 0) { return lsm_error_ok; diff --git a/lsm/src/store/lsm_store_disk_write.c b/lsm/src/store/lsm_store_disk_write.c index acb0015..b5081af 100644 --- a/lsm/src/store/lsm_store_disk_write.c +++ b/lsm/src/store/lsm_store_disk_write.c @@ -124,3 +124,28 @@ lsm_error lsm_entry_disk_insert(lsm_entry_handle *handle) { return res; } + +// Marking an entry as removed in the idx file is simply setting the length of +// its entry to zero +lsm_error lsm_entry_disk_remove(lsm_entry_handle *handle) { + lsm_store *store = handle->store; + lsm_entry *entry = handle->wrapper->entry; + + pthread_mutex_lock(&store->idx.lock); + + lsm_error res = + lsm_fseek(store->idx.f, entry->idx_file_offset + sizeof(uint64_t)); + + if (res != lsm_error_ok) { + pthread_mutex_unlock(&store->idx.lock); + + return res; + } + + uint64_t val = 0; + res = lsm_fwrite(NULL, store->idx.f, sizeof(uint64_t), 1, &val); + + pthread_mutex_unlock(&store->idx.lock); + + return res; +} diff --git a/lsm/src/store/lsm_store_entry.c b/lsm/src/store/lsm_store_entry.c index 8212ba6..fd55624 100644 --- a/lsm/src/store/lsm_store_entry.c +++ b/lsm/src/store/lsm_store_entry.c @@ -19,6 +19,14 @@ lsm_error lsm_entry_init(lsm_entry **ptr) { return lsm_error_ok; } +void lsm_entry_free(lsm_entry *entry) { + if (entry->attrs.count > 0) { + free(entry->attrs.items); + } + + free(entry); +} + lsm_error lsm_entry_wrapper_init(lsm_entry_wrapper **ptr) { lsm_entry_wrapper *wrap = calloc(1, sizeof(lsm_entry_wrapper)); @@ -58,7 +66,10 @@ void lsm_entry_close(lsm_entry_handle *handle) { lsm_entry_disk_insert(handle); } else if ((handle->states & lsm_entry_handle_state_removed) && !(handle->states & lsm_entry_handle_state_new)) { - /* lsm_entry_disk_remove(handle); */ + lsm_entry_disk_remove(handle); + + lsm_entry_free(handle->wrapper->entry); + handle->wrapper->entry = NULL; } else if (handle->states & lsm_entry_handle_state_updated) { /* lsm_entry_disk_update(handle); */ } From c8728f2371980c5a5898c4ad68b484d289de1694 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Sun, 12 Nov 2023 13:19:30 +0100 Subject: [PATCH 53/70] refactor(lsm): abstract determining entry data path --- lsm/src/_include/lsm/store_internal.h | 12 ++++++++++++ lsm/src/store/lsm_store.c | 14 ++++++-------- lsm/src/store/lsm_store_entry.c | 22 ++++++++++++++++++++++ 3 files changed, 40 insertions(+), 8 deletions(-) diff --git a/lsm/src/_include/lsm/store_internal.h b/lsm/src/_include/lsm/store_internal.h index afbd4f3..f356bd4 100644 --- a/lsm/src/_include/lsm/store_internal.h +++ b/lsm/src/_include/lsm/store_internal.h @@ -10,6 +10,7 @@ #define LSM_DB_FILE_NAME "lsm.db" #define LSM_IDX_FILE_NAME "lsm.idx" +#define LSM_DATA_FILE_SUFFIX ".data" typedef struct lsm_attr { uint8_t type; @@ -129,4 +130,15 @@ lsm_error lsm_entry_disk_remove(lsm_entry_handle *handle); */ lsm_error lsm_entry_disk_update(lsm_entry_handle *handle); +/** + * Return the length of the path to this entry's data file + */ +uint64_t lsm_entry_data_path_len(lsm_entry_handle *handle); + +/** + * Fill in the entry's data file path in the provided buffer. Use + * `lsm_entry_data_path_len` to allocate an appropriately-sized buffer + */ +void lsm_entry_data_path(char *buf, lsm_entry_handle *handle); + #endif diff --git a/lsm/src/store/lsm_store.c b/lsm/src/store/lsm_store.c index 57eacb7..f93e289 100644 --- a/lsm/src/store/lsm_store.c +++ b/lsm/src/store/lsm_store.c @@ -174,11 +174,10 @@ lsm_error lsm_entry_data_append(lsm_entry_handle *handle, lsm_str *data) { // Entries don't open their file unless needed if (handle->f == NULL) { - char path[handle->store->data_path->len + entry->key->len + 2]; - sprintf(path, "%s/%s", lsm_str_ptr(handle->store->data_path), - lsm_str_ptr(entry->key)); + char data_path[lsm_entry_data_path_len(handle) + 1]; + lsm_entry_data_path(data_path, handle); - FILE *f = fopen(path, "ab"); + FILE *f = fopen(data_path, "ab"); if (f == NULL) { return lsm_error_failed_io; @@ -213,11 +212,10 @@ lsm_error lsm_entry_data_read(uint64_t *out, char *buf, // Entries don't open their file unless needed if (handle->f == NULL) { - char path[handle->store->data_path->len + entry->key->len + 2]; - sprintf(path, "%s/%s", lsm_str_ptr(handle->store->data_path), - lsm_str_ptr(entry->key)); + char data_path[lsm_entry_data_path_len(handle) + 1]; + lsm_entry_data_path(data_path, handle); - FILE *f = fopen(path, "rb"); + FILE *f = fopen(data_path, "rb"); if (f == NULL) { return lsm_error_failed_io; diff --git a/lsm/src/store/lsm_store_entry.c b/lsm/src/store/lsm_store_entry.c index fd55624..a9c0ade 100644 --- a/lsm/src/store/lsm_store_entry.c +++ b/lsm/src/store/lsm_store_entry.c @@ -227,3 +227,25 @@ lsm_error lsm_entry_attr_insert_uint8_t(lsm_entry_handle *handle, uint8_t type, uint64_t lsm_entry_data_len(lsm_entry_handle *handle) { return handle->wrapper->entry->data_len; } + +uint64_t lsm_entry_data_path_len(lsm_entry_handle *handle) { + // [data path]/[entry key][data file suffix] + return lsm_str_len(handle->store->data_path) + + lsm_str_len(handle->wrapper->entry->key) + + strlen(LSM_DATA_FILE_SUFFIX) + 1; +} +void lsm_entry_data_path(char *buf, lsm_entry_handle *handle) { + lsm_str *data_path = handle->store->data_path; + lsm_str *key = handle->wrapper->entry->key; + + memcpy(buf, lsm_str_ptr(data_path), lsm_str_len(data_path)); + + uint64_t index = lsm_str_len(data_path); + buf[index] = '/'; + + index += 1; + memcpy(&buf[index], lsm_str_ptr(key), lsm_str_len(key)); + + index += lsm_str_len(key); + strcpy(&buf[index], LSM_DATA_FILE_SUFFIX); +} From a4ad8c246e2d59940fdbabb9db4a7e79638204dc Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Sun, 12 Nov 2023 13:21:04 +0100 Subject: [PATCH 54/70] feat(lsm): remove data file when removing entry --- lsm/src/store/lsm_store_disk_write.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/lsm/src/store/lsm_store_disk_write.c b/lsm/src/store/lsm_store_disk_write.c index b5081af..243c8f5 100644 --- a/lsm/src/store/lsm_store_disk_write.c +++ b/lsm/src/store/lsm_store_disk_write.c @@ -147,5 +147,24 @@ lsm_error lsm_entry_disk_remove(lsm_entry_handle *handle) { pthread_mutex_unlock(&store->idx.lock); - return res; + if (res != lsm_error_ok) { + return res; + } + + // Remove data file if present + if (entry->data_len > 0) { + if (handle->f != NULL) { + fclose(handle->f); + handle->f = NULL; + } + + char data_path[lsm_entry_data_path_len(handle) + 1]; + lsm_entry_data_path(data_path, handle); + + if (remove(data_path) != 0) { + return lsm_error_failed_io; + } + } + + return lsm_error_ok; } From 3d48ee8019e9a84e7dd34527b19c4102d1e469b2 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Sun, 12 Nov 2023 13:43:21 +0100 Subject: [PATCH 55/70] feat(lander): support DELETE requests --- include/lander.h | 4 +++- lsm/src/store/lsm_store_disk_write.c | 2 ++ src/lander/lander.c | 8 ++++++++ src/lander/lander_delete.c | 29 ++++++++++++++++++++++++++++ 4 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 src/lander/lander_delete.c diff --git a/include/lander.h b/include/lander.h index 88bfab9..61c4297 100644 --- a/include/lander.h +++ b/include/lander.h @@ -4,7 +4,7 @@ #include "http_loop.h" #include "lsm/store.h" -extern http_route lander_routes[4]; +extern http_route lander_routes[5]; typedef struct lander_gctx { const char *data_dir; @@ -57,4 +57,6 @@ bool lander_get_entry_lsm(event_loop_conn *conn); bool lander_post_redirect_body_to_attr(event_loop_conn *conn); +bool lander_remove_entry(event_loop_conn *conn); + #endif diff --git a/lsm/src/store/lsm_store_disk_write.c b/lsm/src/store/lsm_store_disk_write.c index 243c8f5..b6906e6 100644 --- a/lsm/src/store/lsm_store_disk_write.c +++ b/lsm/src/store/lsm_store_disk_write.c @@ -151,6 +151,8 @@ lsm_error lsm_entry_disk_remove(lsm_entry_handle *handle) { return res; } + fflush(store->idx.f); + // Remove data file if present if (entry->data_len > 0) { if (handle->f != NULL) { diff --git a/src/lander/lander.c b/src/lander/lander.c index 57f5c5e..a045428 100644 --- a/src/lander/lander.c +++ b/src/lander/lander.c @@ -19,6 +19,14 @@ http_route lander_routes[] = { .steps_res = {http_loop_step_write_header, lander_stream_body_to_client, NULL}, }, + { + .type = http_route_regex, + .method = http_delete, + .path = "^/([^/]+)$", + .steps = {http_loop_step_auth, lander_remove_entry, NULL}, + .steps_res = {http_loop_step_write_header, http_loop_step_write_body, + NULL}, + }, { .type = http_route_regex, .method = http_post, diff --git a/src/lander/lander_delete.c b/src/lander/lander_delete.c new file mode 100644 index 0000000..e91b6c9 --- /dev/null +++ b/src/lander/lander_delete.c @@ -0,0 +1,29 @@ +#include "lander.h" + +bool lander_remove_entry(event_loop_conn *conn) { + http_loop_ctx *ctx = conn->ctx; + lander_ctx *c_ctx = ctx->c; + http_loop_gctx *gctx = ctx->g; + lander_gctx *c_gctx = gctx->c; + + const char *key_s = &ctx->req.path[ctx->req.regex_groups[1].rm_so]; + int key_len = ctx->req.regex_groups[1].rm_eo - ctx->req.regex_groups[1].rm_so; + + lsm_str *key; + lsm_str_init_copy_n(&key, (char *)key_s, key_len); + + switch (lsm_store_open_write(&c_ctx->entry, c_gctx->store, key)) { + case lsm_error_ok: + break; + case lsm_error_not_found: + ctx->res.status = http_not_found; + return true; + default: + ctx->res.status = http_internal_server_error; + return true; + } + + lsm_entry_remove(c_ctx->entry); + + return true; +} From 7fac278eada77bb49930ce5d62aacd0c2afdecdf Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Sun, 12 Nov 2023 13:57:11 +0100 Subject: [PATCH 56/70] feat(lander): introduce file entry type --- include/lander.h | 5 ++- landerctl | 8 ++++ src/lander/lander.c | 7 ++++ src/lander/lander_get.c | 91 ++++++++++++++++++++++++++-------------- src/lander/lander_post.c | 15 +++++++ 5 files changed, 93 insertions(+), 33 deletions(-) diff --git a/include/lander.h b/include/lander.h index 61c4297..2d8f8e5 100644 --- a/include/lander.h +++ b/include/lander.h @@ -4,7 +4,7 @@ #include "http_loop.h" #include "lsm/store.h" -extern http_route lander_routes[5]; +extern http_route lander_routes[6]; typedef struct lander_gctx { const char *data_dir; @@ -27,6 +27,7 @@ typedef enum lander_attr_type : uint8_t { typedef enum lander_entry_type : uint8_t { lander_entry_type_redirect = 0, lander_entry_type_paste = 1, + lander_entry_type_file = 2, } lander_entry_type; void *lander_gctx_init(); @@ -59,4 +60,6 @@ bool lander_post_redirect_body_to_attr(event_loop_conn *conn); bool lander_remove_entry(event_loop_conn *conn); +bool lander_post_file_lsm(event_loop_conn *conn); + #endif diff --git a/landerctl b/landerctl index c0433a5..f7bdded 100755 --- a/landerctl +++ b/landerctl @@ -38,4 +38,12 @@ elif [ "$1" = pl ]; then -H "X-Api-Key: $API_KEY" \ --data-binary @"$2" \ "$URL/pl/$3" + +elif [ "$1" = f ]; then + curl \ + -w "${URL}%header{location}" \ + -XPOST \ + -H "X-Api-Key: $API_KEY" \ + --data-binary @"$2" \ + "$URL/f/$3" fi diff --git a/src/lander/lander.c b/src/lander/lander.c index a045428..ba34ea1 100644 --- a/src/lander/lander.c +++ b/src/lander/lander.c @@ -44,6 +44,13 @@ http_route lander_routes[] = { lander_post_paste_lsm, lander_stream_body_to_entry, NULL}, .steps_res = {http_loop_step_write_header, http_loop_step_write_body, NULL}}, + {.type = http_route_regex, + .method = http_post, + .path = "^/f(l?)/([^/]*)$", + .steps = {http_loop_step_auth, http_loop_step_parse_content_length, + lander_post_file_lsm, lander_stream_body_to_entry, NULL}, + .steps_res = {http_loop_step_write_header, http_loop_step_write_body, + NULL}}, }; void *lander_gctx_init() { return calloc(1, sizeof(lander_gctx)); } diff --git a/src/lander/lander_get.c b/src/lander/lander_get.c index 7c467b5..03076c8 100644 --- a/src/lander/lander_get.c +++ b/src/lander/lander_get.c @@ -26,6 +26,55 @@ bool lander_get_index(event_loop_conn *conn) { return true; } +void lander_get_redirect(event_loop_conn *conn) { + http_loop_ctx *ctx = conn->ctx; + lander_ctx *c_ctx = ctx->c; + + // For redirects, the URL is stored as an in-memory attribute + lsm_str *url_attr_val; + + // This shouldn't be able to happen + if (lsm_entry_attr_get(&url_attr_val, c_ctx->entry, lander_attr_type_url) != + lsm_error_ok) { + error("Entry of type redirect detected without URL attribute"); + + ctx->res.status = http_internal_server_error; + lsm_entry_close(c_ctx->entry); + c_ctx->entry = NULL; + + return; + } + + char *buf = malloc(lsm_str_len(url_attr_val) + 1); + memcpy(buf, lsm_str_ptr(url_attr_val), lsm_str_len(url_attr_val)); + + buf[lsm_str_len(url_attr_val)] = '\0'; + + ctx->res.status = http_moved_permanently; + http_res_add_header(&ctx->res, http_header_location, buf, true); + + // We no longer need the entry at this point, so we can unlock it early + // This will also signal to the response code not to read any data from + // the entry + lsm_entry_close(c_ctx->entry); + c_ctx->entry = NULL; +} + +void lander_get_paste(event_loop_conn *conn) { + http_loop_ctx *ctx = conn->ctx; + lander_ctx *c_ctx = ctx->c; + + ctx->res.body.expected_len = lsm_entry_data_len(c_ctx->entry); + http_res_set_mime_type(&ctx->res, http_mime_txt); +} + +void lander_get_file(event_loop_conn *conn) { + http_loop_ctx *ctx = conn->ctx; + lander_ctx *c_ctx = ctx->c; + + ctx->res.body.expected_len = lsm_entry_data_len(c_ctx->entry); +} + bool lander_get_entry_lsm(event_loop_conn *conn) { http_loop_ctx *ctx = conn->ctx; lander_ctx *c_ctx = ctx->c; @@ -53,38 +102,16 @@ bool lander_get_entry_lsm(event_loop_conn *conn) { lsm_entry_attr_get_uint8_t((uint8_t *)&t, c_ctx->entry, lander_attr_type_entry_type); - if (t == lander_entry_type_redirect) { - // For redirects, the URL is stored as an in-memory attribute - lsm_str *url_attr_val; - - // This shouldn't be able to happen - if (lsm_entry_attr_get(&url_attr_val, c_ctx->entry, lander_attr_type_url) != - lsm_error_ok) { - error("Entry of type redirect detected without URL attribute"); - - ctx->res.status = http_internal_server_error; - lsm_entry_close(c_ctx->entry); - c_ctx->entry = NULL; - - return true; - } - - char *buf = malloc(lsm_str_len(url_attr_val) + 1); - memcpy(buf, lsm_str_ptr(url_attr_val), lsm_str_len(url_attr_val)); - - buf[lsm_str_len(url_attr_val)] = '\0'; - - ctx->res.status = http_moved_permanently; - http_res_add_header(&ctx->res, http_header_location, buf, true); - - // We no longer need the entry at this point, so we can unlock it early - // This will also signal to the response code not to read any data from - // the entry - lsm_entry_close(c_ctx->entry); - c_ctx->entry = NULL; - } else { - ctx->res.body.expected_len = lsm_entry_data_len(c_ctx->entry); - http_res_set_mime_type(&ctx->res, http_mime_txt); + switch (t) { + case lander_entry_type_redirect: + lander_get_redirect(conn); + break; + case lander_entry_type_paste: + lander_get_paste(conn); + break; + case lander_entry_type_file: + lander_get_file(conn); + break; } return true; diff --git a/src/lander/lander_post.c b/src/lander/lander_post.c index 429ea39..13679cc 100644 --- a/src/lander/lander_post.c +++ b/src/lander/lander_post.c @@ -164,6 +164,21 @@ bool lander_post_paste_lsm(event_loop_conn *conn) { return true; } +bool lander_post_file_lsm(event_loop_conn *conn) { + http_loop_ctx *ctx = conn->ctx; + lander_ctx *c_ctx = ctx->c; + + if (!lander_insert_entry(ctx)) { + conn->state = event_loop_conn_state_res; + return true; + } + + lsm_entry_attr_insert_uint8_t(c_ctx->entry, lander_attr_type_entry_type, + lander_entry_type_file); + + return true; +} + bool lander_stream_body_to_entry(event_loop_conn *conn) { http_loop_ctx *ctx = conn->ctx; lander_ctx *c_ctx = ctx->c; From 70f622d9f3329f565a7b229ab992c8c52265883f Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Sun, 12 Nov 2023 14:12:13 +0100 Subject: [PATCH 57/70] feat(lander): support sendind extra attributes as custom headers --- include/lander.h | 6 ++++++ src/lander/lander.c | 36 +++++++++++++++++++++++++++++++++++- 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/include/lander.h b/include/lander.h index 2d8f8e5..1bb38d3 100644 --- a/include/lander.h +++ b/include/lander.h @@ -62,4 +62,10 @@ bool lander_remove_entry(event_loop_conn *conn); bool lander_post_file_lsm(event_loop_conn *conn); +/** + * Parse any custom headers and add them as attributes to the context's LSM + * entry + */ +bool lander_headers_to_attrs(event_loop_conn *conn); + #endif diff --git a/src/lander/lander.c b/src/lander/lander.c index ba34ea1..79518b0 100644 --- a/src/lander/lander.c +++ b/src/lander/lander.c @@ -48,11 +48,19 @@ http_route lander_routes[] = { .method = http_post, .path = "^/f(l?)/([^/]*)$", .steps = {http_loop_step_auth, http_loop_step_parse_content_length, - lander_post_file_lsm, lander_stream_body_to_entry, NULL}, + lander_post_file_lsm, lander_headers_to_attrs, lander_stream_body_to_entry, NULL}, .steps_res = {http_loop_step_write_header, http_loop_step_write_body, NULL}}, }; +struct { + char *header; + lander_attr_type attr_type; +} header_to_attr_type[] = { + { "X-Lander-Content-Type", lander_attr_type_content_type }, + { NULL, 0 }, +}; + void *lander_gctx_init() { return calloc(1, sizeof(lander_gctx)); } void *lander_ctx_init() { return calloc(1, sizeof(lander_ctx)); } @@ -68,3 +76,29 @@ void lander_ctx_reset(lander_ctx *ctx) { } void lander_ctx_free(lander_ctx *ctx) { free(ctx); } + +bool lander_headers_to_attrs(event_loop_conn *conn) { + http_loop_ctx *ctx = conn->ctx; + lander_ctx *c_ctx = ctx->c; + + for (size_t i = 0; i < ctx->req.num_headers; i++) { + struct phr_header *header = &ctx->req.headers[i]; + + int j = 0; + + while (header_to_attr_type[j].header != NULL) { + if (strncmp(header->name, header_to_attr_type[j].header, header->name_len) == 0) { + lsm_str *value; + lsm_str_init_copy_n(&value, (char *)header->value, header->value_len); + + lsm_entry_attr_insert(c_ctx->entry, header_to_attr_type[j].attr_type, value); + + break; + } + + j++; + } + } + + return true; +} From c026e13c44ef5334c58c7bc8c6a54846462c4b39 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Sun, 12 Nov 2023 14:29:46 +0100 Subject: [PATCH 58/70] feat(lander): server content-type header for file entries --- include/lander.h | 2 ++ landerctl | 1 + src/lander/lander.c | 42 +++++++++++++++++++++++++++++++++++------ src/lander/lander_get.c | 2 ++ 4 files changed, 41 insertions(+), 6 deletions(-) diff --git a/include/lander.h b/include/lander.h index 1bb38d3..a83cf74 100644 --- a/include/lander.h +++ b/include/lander.h @@ -68,4 +68,6 @@ bool lander_post_file_lsm(event_loop_conn *conn); */ bool lander_headers_to_attrs(event_loop_conn *conn); +bool lander_attrs_to_headers(event_loop_conn *conn); + #endif diff --git a/landerctl b/landerctl index f7bdded..d586942 100755 --- a/landerctl +++ b/landerctl @@ -44,6 +44,7 @@ elif [ "$1" = f ]; then -w "${URL}%header{location}" \ -XPOST \ -H "X-Api-Key: $API_KEY" \ + -H "X-Lander-Content-Type: $(file --mime-type --brief $2)" \ --data-binary @"$2" \ "$URL/f/$3" fi diff --git a/src/lander/lander.c b/src/lander/lander.c index 79518b0..11e5af1 100644 --- a/src/lander/lander.c +++ b/src/lander/lander.c @@ -1,5 +1,6 @@ #include +#include "http/types.h" #include "http_loop.h" #include "lander.h" #include "lsm/store.h" @@ -15,7 +16,7 @@ http_route lander_routes[] = { .type = http_route_regex, .method = http_get, .path = "^/([^/]+)$", - .steps = {lander_get_entry_lsm, NULL}, + .steps = {lander_get_entry_lsm, lander_attrs_to_headers, NULL}, .steps_res = {http_loop_step_write_header, lander_stream_body_to_client, NULL}, }, @@ -48,7 +49,8 @@ http_route lander_routes[] = { .method = http_post, .path = "^/f(l?)/([^/]*)$", .steps = {http_loop_step_auth, http_loop_step_parse_content_length, - lander_post_file_lsm, lander_headers_to_attrs, lander_stream_body_to_entry, NULL}, + lander_post_file_lsm, lander_headers_to_attrs, + lander_stream_body_to_entry, NULL}, .steps_res = {http_loop_step_write_header, http_loop_step_write_body, NULL}}, }; @@ -56,9 +58,11 @@ http_route lander_routes[] = { struct { char *header; lander_attr_type attr_type; + http_header header_type; } header_to_attr_type[] = { - { "X-Lander-Content-Type", lander_attr_type_content_type }, - { NULL, 0 }, + {"X-Lander-Content-Type", lander_attr_type_content_type, + http_header_content_type}, + {NULL, 0}, }; void *lander_gctx_init() { return calloc(1, sizeof(lander_gctx)); } @@ -87,11 +91,13 @@ bool lander_headers_to_attrs(event_loop_conn *conn) { int j = 0; while (header_to_attr_type[j].header != NULL) { - if (strncmp(header->name, header_to_attr_type[j].header, header->name_len) == 0) { + if (strncmp(header->name, header_to_attr_type[j].header, + header->name_len) == 0) { lsm_str *value; lsm_str_init_copy_n(&value, (char *)header->value, header->value_len); - lsm_entry_attr_insert(c_ctx->entry, header_to_attr_type[j].attr_type, value); + lsm_entry_attr_insert(c_ctx->entry, header_to_attr_type[j].attr_type, + value); break; } @@ -102,3 +108,27 @@ bool lander_headers_to_attrs(event_loop_conn *conn) { return true; } + +bool lander_attrs_to_headers(event_loop_conn *conn) { + http_loop_ctx *ctx = conn->ctx; + lander_ctx *c_ctx = ctx->c; + + int j = 0; + lsm_str *value; + + while (header_to_attr_type[j].header != NULL) { + if (lsm_entry_attr_get(&value, c_ctx->entry, + header_to_attr_type[j].attr_type) == lsm_error_ok) { + char *buf = malloc(lsm_str_len(value) + 1); + memcpy(buf, lsm_str_ptr(value), lsm_str_len(value)); + buf[lsm_str_len(value)] = '\0'; + + http_res_add_header(&ctx->res, header_to_attr_type[j].header_type, buf, + true); + } + + j++; + } + + return true; +} diff --git a/src/lander/lander_get.c b/src/lander/lander_get.c index 03076c8..dead552 100644 --- a/src/lander/lander_get.c +++ b/src/lander/lander_get.c @@ -92,9 +92,11 @@ bool lander_get_entry_lsm(event_loop_conn *conn) { break; case lsm_error_not_found: ctx->res.status = http_not_found; + conn->state = event_loop_conn_state_res; return true; default: ctx->res.status = http_internal_server_error; + conn->state = event_loop_conn_state_res; return true; } From 64af94ce7a2e40e8785d03838032c5f2a7de4b4b Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Sun, 12 Nov 2023 14:48:55 +0100 Subject: [PATCH 59/70] refactor(lander): clean up code a bit --- include/http_loop.h | 1 - include/lander.h | 20 +-- src/http_loop/http_loop_steps.c | 1 + src/lander/lander.c | 73 ++-------- src/lander/lander_get.c | 2 +- src/lander/lander_post.c | 146 +------------------- src/lander/lander_steps.c | 73 ++++++++++ src/main.c | 16 +-- src/main.cpp | 228 -------------------------------- 9 files changed, 103 insertions(+), 457 deletions(-) create mode 100644 src/lander/lander_steps.c delete mode 100644 src/main.cpp diff --git a/include/http_loop.h b/include/http_loop.h index e05ee22..131bd6e 100644 --- a/include/http_loop.h +++ b/include/http_loop.h @@ -7,7 +7,6 @@ #include "http/req.h" #include "http/res.h" #include "http/types.h" -#include "trie.h" // Max amount of steps a route can use #define HTTP_LOOP_MAX_STEPS 17 diff --git a/include/lander.h b/include/lander.h index a83cf74..de89437 100644 --- a/include/lander.h +++ b/include/lander.h @@ -5,17 +5,15 @@ #include "lsm/store.h" extern http_route lander_routes[6]; +extern const char lander_key_charset[]; typedef struct lander_gctx { const char *data_dir; - Trie *trie; lsm_store *store; - } lander_gctx; typedef struct lander_ctx { lsm_entry_handle *entry; - uint64_t remaining_data; } lander_ctx; typedef enum lander_attr_type : uint8_t { @@ -24,6 +22,14 @@ typedef enum lander_attr_type : uint8_t { lander_attr_type_url = 2, } lander_attr_type; +typedef struct { + char *header; + lander_attr_type attr_type; + http_header header_type; +} header_to_attr; + +extern header_to_attr header_to_attrs[]; + typedef enum lander_entry_type : uint8_t { lander_entry_type_redirect = 0, lander_entry_type_paste = 1, @@ -46,21 +52,19 @@ bool lander_post_redirect(event_loop_conn *conn); bool lander_post_paste(event_loop_conn *conn); -bool lander_post_paste_lsm(event_loop_conn *conn); +bool lander_post_paste(event_loop_conn *conn); -bool lander_post_redirect_lsm(event_loop_conn *conn); +bool lander_post_redirect(event_loop_conn *conn); bool lander_stream_body_to_entry(event_loop_conn *conn); bool lander_stream_body_to_client(event_loop_conn *conn); -bool lander_get_entry_lsm(event_loop_conn *conn); - bool lander_post_redirect_body_to_attr(event_loop_conn *conn); bool lander_remove_entry(event_loop_conn *conn); -bool lander_post_file_lsm(event_loop_conn *conn); +bool lander_post_file(event_loop_conn *conn); /** * Parse any custom headers and add them as attributes to the context's LSM diff --git a/src/http_loop/http_loop_steps.c b/src/http_loop/http_loop_steps.c index dfa8c96..bcfeae7 100644 --- a/src/http_loop/http_loop_steps.c +++ b/src/http_loop/http_loop_steps.c @@ -1,4 +1,5 @@ #include +#include #include "http_loop.h" #include "lander.h" diff --git a/src/lander/lander.c b/src/lander/lander.c index 11e5af1..06395cb 100644 --- a/src/lander/lander.c +++ b/src/lander/lander.c @@ -5,6 +5,9 @@ #include "lander.h" #include "lsm/store.h" +const char lander_key_charset[] = + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; + http_route lander_routes[] = { {.type = http_route_literal, .method = http_get, @@ -16,7 +19,7 @@ http_route lander_routes[] = { .type = http_route_regex, .method = http_get, .path = "^/([^/]+)$", - .steps = {lander_get_entry_lsm, lander_attrs_to_headers, NULL}, + .steps = {lander_get_entry, lander_attrs_to_headers, NULL}, .steps_res = {http_loop_step_write_header, lander_stream_body_to_client, NULL}, }, @@ -32,7 +35,7 @@ http_route lander_routes[] = { .type = http_route_regex, .method = http_post, .path = "^/s(l?)/([^/]*)$", - .steps = {http_loop_step_auth, lander_post_redirect_lsm, + .steps = {http_loop_step_auth, lander_post_redirect, http_loop_step_body_to_buf, lander_post_redirect_body_to_attr, NULL}, .steps_res = {http_loop_step_write_header, http_loop_step_write_body, @@ -42,27 +45,23 @@ http_route lander_routes[] = { .method = http_post, .path = "^/p(l?)/([^/]*)$", .steps = {http_loop_step_auth, http_loop_step_parse_content_length, - lander_post_paste_lsm, lander_stream_body_to_entry, NULL}, + lander_post_paste, lander_stream_body_to_entry, NULL}, .steps_res = {http_loop_step_write_header, http_loop_step_write_body, NULL}}, {.type = http_route_regex, .method = http_post, .path = "^/f(l?)/([^/]*)$", .steps = {http_loop_step_auth, http_loop_step_parse_content_length, - lander_post_file_lsm, lander_headers_to_attrs, + lander_post_file, lander_headers_to_attrs, lander_stream_body_to_entry, NULL}, .steps_res = {http_loop_step_write_header, http_loop_step_write_body, NULL}}, }; -struct { - char *header; - lander_attr_type attr_type; - http_header header_type; -} header_to_attr_type[] = { +header_to_attr header_to_attrs[] = { {"X-Lander-Content-Type", lander_attr_type_content_type, http_header_content_type}, - {NULL, 0}, + {NULL, 0, 0}, }; void *lander_gctx_init() { return calloc(1, sizeof(lander_gctx)); } @@ -75,60 +74,6 @@ void lander_ctx_reset(lander_ctx *ctx) { ctx->entry = NULL; } - - ctx->remaining_data = 0; } void lander_ctx_free(lander_ctx *ctx) { free(ctx); } - -bool lander_headers_to_attrs(event_loop_conn *conn) { - http_loop_ctx *ctx = conn->ctx; - lander_ctx *c_ctx = ctx->c; - - for (size_t i = 0; i < ctx->req.num_headers; i++) { - struct phr_header *header = &ctx->req.headers[i]; - - int j = 0; - - while (header_to_attr_type[j].header != NULL) { - if (strncmp(header->name, header_to_attr_type[j].header, - header->name_len) == 0) { - lsm_str *value; - lsm_str_init_copy_n(&value, (char *)header->value, header->value_len); - - lsm_entry_attr_insert(c_ctx->entry, header_to_attr_type[j].attr_type, - value); - - break; - } - - j++; - } - } - - return true; -} - -bool lander_attrs_to_headers(event_loop_conn *conn) { - http_loop_ctx *ctx = conn->ctx; - lander_ctx *c_ctx = ctx->c; - - int j = 0; - lsm_str *value; - - while (header_to_attr_type[j].header != NULL) { - if (lsm_entry_attr_get(&value, c_ctx->entry, - header_to_attr_type[j].attr_type) == lsm_error_ok) { - char *buf = malloc(lsm_str_len(value) + 1); - memcpy(buf, lsm_str_ptr(value), lsm_str_len(value)); - buf[lsm_str_len(value)] = '\0'; - - http_res_add_header(&ctx->res, header_to_attr_type[j].header_type, buf, - true); - } - - j++; - } - - return true; -} diff --git a/src/lander/lander_get.c b/src/lander/lander_get.c index dead552..bd62a16 100644 --- a/src/lander/lander_get.c +++ b/src/lander/lander_get.c @@ -75,7 +75,7 @@ void lander_get_file(event_loop_conn *conn) { ctx->res.body.expected_len = lsm_entry_data_len(c_ctx->entry); } -bool lander_get_entry_lsm(event_loop_conn *conn) { +bool lander_get_entry(event_loop_conn *conn) { http_loop_ctx *ctx = conn->ctx; lander_ctx *c_ctx = ctx->c; http_loop_gctx *gctx = ctx->g; diff --git a/src/lander/lander_post.c b/src/lander/lander_post.c index 13679cc..cdde6d0 100644 --- a/src/lander/lander_post.c +++ b/src/lander/lander_post.c @@ -5,71 +5,15 @@ #include "lsm/store.h" static void randomize_key(char *key, int len) { + size_t charset_len = strlen(lander_key_charset); + for (int i = 0; i < len; i++) { - key[i] = charset[rand() % charset_len]; + key[i] = lander_key_charset[rand() % charset_len]; } key[len] = '\0'; } -// TODO entry leaks if key is already present -static bool add_entry(char **key_ptr, int *key_len_ptr, http_loop_ctx *ctx, - Entry *entry, bool random) { - lander_gctx *c_gctx = ctx->g->c; - - // The first match group matches the "long" path - bool secure = - (ctx->req.regex_groups[1].rm_eo - ctx->req.regex_groups[1].rm_so) == 1; - - char *key; - int key_len = 0; - TrieExitCode res; - - if (random) { - res = trie_add_random(c_gctx->trie, &key, entry, secure); - - if (res == Ok) { - key_len = strlen(key); - } - } else { - key = (char *)&ctx->req.path[ctx->req.regex_groups[2].rm_so]; - key_len = ctx->req.regex_groups[2].rm_eo - ctx->req.regex_groups[2].rm_so; - - res = trie_add_len(c_gctx->trie, key, key_len, entry); - } - - switch (res) { - case Ok: - break; - case AlreadyPresent: - ctx->res.status = http_conflict; - return false; - default: - ctx->res.status = http_internal_server_error; - return false; - } - - // Add a slash to the key and add it as the location header - char *buf = malloc(key_len + 2); - - memcpy(&buf[1], key, key_len); - buf[0] = '/'; - buf[key_len + 1] = '\0'; - - http_res_add_header(&ctx->res, http_header_location, buf, true); - ctx->res.status = http_created; - - if (key_ptr != NULL) { - *key_ptr = key; - } - - if (key_len_ptr != NULL) { - *key_len_ptr = key_len; - } - - return true; -} - /** * Insert a new entry into the store. * @@ -123,7 +67,7 @@ bool lander_insert_entry(http_loop_ctx *ctx) { return true; } -bool lander_post_redirect_lsm(event_loop_conn *conn) { +bool lander_post_redirect(event_loop_conn *conn) { http_loop_ctx *ctx = conn->ctx; lander_ctx *c_ctx = ctx->c; @@ -149,7 +93,7 @@ bool lander_post_redirect_body_to_attr(event_loop_conn *conn) { return true; } -bool lander_post_paste_lsm(event_loop_conn *conn) { +bool lander_post_paste(event_loop_conn *conn) { http_loop_ctx *ctx = conn->ctx; lander_ctx *c_ctx = ctx->c; @@ -164,7 +108,7 @@ bool lander_post_paste_lsm(event_loop_conn *conn) { return true; } -bool lander_post_file_lsm(event_loop_conn *conn) { +bool lander_post_file(event_loop_conn *conn) { http_loop_ctx *ctx = conn->ctx; lander_ctx *c_ctx = ctx->c; @@ -178,81 +122,3 @@ bool lander_post_file_lsm(event_loop_conn *conn) { return true; } - -bool lander_stream_body_to_entry(event_loop_conn *conn) { - http_loop_ctx *ctx = conn->ctx; - lander_ctx *c_ctx = ctx->c; - - uint64_t to_append = - MIN(conn->rbuf_size - conn->rbuf_read, - ctx->req.body.expected_len - lsm_entry_data_len(c_ctx->entry)); - - lsm_str *data; - lsm_str_init_copy_n(&data, (char *)&conn->rbuf[conn->rbuf_read], to_append); - lsm_entry_data_append(c_ctx->entry, data); - - conn->rbuf_read += to_append; - - lsm_str_free(data); - - return lsm_entry_data_len(c_ctx->entry) == ctx->req.body.expected_len; -} - -bool lander_post_redirect(event_loop_conn *conn) { - http_loop_ctx *ctx = conn->ctx; - bool random = - ctx->req.regex_groups[2].rm_eo == ctx->req.regex_groups[2].rm_so; - - // Allocate a new buffer to pass to the trie - char *url = malloc(ctx->req.body.len + 1); - memcpy(url, ctx->req.body.buf, ctx->req.body.len); - url[ctx->req.body.len] = '\0'; - - Entry *new_entry = entry_new(Redirect, url); - - // The entry duplicates the string - free(url); - - // We don't check the result here, because we would perform the same action - // either way - char *key; - add_entry(&key, NULL, ctx, new_entry, random); - - if (random) { - free(key); - } - - conn->state = event_loop_conn_state_res; - - return true; -} - -bool lander_post_paste(event_loop_conn *conn) { - http_loop_ctx *ctx = conn->ctx; - lander_gctx *c_gctx = ctx->g->c; - - bool random = - ctx->req.regex_groups[2].rm_eo == ctx->req.regex_groups[2].rm_so; - - char *key; - int key_len; - Entry *new_entry = entry_new(Paste, ""); - - if (!add_entry(&key, &key_len, ctx, new_entry, random)) { - conn->state = event_loop_conn_state_res; - - return true; - } - - char *fname = malloc(strlen(c_gctx->data_dir) + 8 + key_len + 1); - sprintf(fname, "%s/pastes/%.*s", c_gctx->data_dir, key_len, key); - - ctx->req.body.fname = fname; - ctx->req.body.fname_owned = true; - - if (random) { - free(key); - } - - return true; -} diff --git a/src/lander/lander_steps.c b/src/lander/lander_steps.c new file mode 100644 index 0000000..c84f9d2 --- /dev/null +++ b/src/lander/lander_steps.c @@ -0,0 +1,73 @@ +#include + +#include "lander.h" + +bool lander_stream_body_to_entry(event_loop_conn *conn) { + http_loop_ctx *ctx = conn->ctx; + lander_ctx *c_ctx = ctx->c; + + uint64_t to_append = + MIN(conn->rbuf_size - conn->rbuf_read, + ctx->req.body.expected_len - lsm_entry_data_len(c_ctx->entry)); + + lsm_str *data; + lsm_str_init_copy_n(&data, (char *)&conn->rbuf[conn->rbuf_read], to_append); + lsm_entry_data_append(c_ctx->entry, data); + + conn->rbuf_read += to_append; + + lsm_str_free(data); + + return lsm_entry_data_len(c_ctx->entry) == ctx->req.body.expected_len; +} + +bool lander_headers_to_attrs(event_loop_conn *conn) { + http_loop_ctx *ctx = conn->ctx; + lander_ctx *c_ctx = ctx->c; + + for (size_t i = 0; i < ctx->req.num_headers; i++) { + struct phr_header *header = &ctx->req.headers[i]; + + int j = 0; + + while (header_to_attrs[j].header != NULL) { + if (strncmp(header->name, header_to_attrs[j].header, header->name_len) == + 0) { + lsm_str *value; + lsm_str_init_copy_n(&value, (char *)header->value, header->value_len); + + lsm_entry_attr_insert(c_ctx->entry, header_to_attrs[j].attr_type, + value); + + break; + } + + j++; + } + } + + return true; +} + +bool lander_attrs_to_headers(event_loop_conn *conn) { + http_loop_ctx *ctx = conn->ctx; + lander_ctx *c_ctx = ctx->c; + + int j = 0; + lsm_str *value; + + while (header_to_attrs[j].header != NULL) { + if (lsm_entry_attr_get(&value, c_ctx->entry, + header_to_attrs[j].attr_type) == lsm_error_ok) { + char *buf = malloc(lsm_str_len(value) + 1); + memcpy(buf, lsm_str_ptr(value), lsm_str_len(value)); + buf[lsm_str_len(value)] = '\0'; + + http_res_add_header(&ctx->res, header_to_attrs[j].header_type, buf, true); + } + + j++; + } + + return true; +} diff --git a/src/main.c b/src/main.c index bf0dd9a..fa9c64f 100644 --- a/src/main.c +++ b/src/main.c @@ -34,20 +34,6 @@ int main() { critical(1, "Invalid TCP port %s", port_str); } - /* char file_path[strlen(data_dir) + 12 + 1]; */ - /* sprintf(file_path, "%s/lander.data", data_dir); */ - - /* info("Initializing trie from file '%s'", file_path); */ - - /* Trie *trie; */ - /* TrieExitCode res = trie_init(&trie, file_path); */ - - /* if (res != Ok) { */ - /* critical(1, "An error occured while populating the trie."); */ - /* } */ - - /* info("Trie initialized and populated with %i entries", trie_size(trie)); */ - lander_gctx *c_gctx = lander_gctx_init(); c_gctx->data_dir = data_dir_s; @@ -60,7 +46,7 @@ int main() { critical(2, "Failed to load existing store."); } - info("Store loaded containing %lu entries.", lsm_store_size(c_gctx->store)); + info("Store loaded containing %lu entries", lsm_store_size(c_gctx->store)); http_loop *hl = http_loop_init( lander_routes, sizeof(lander_routes) / sizeof(lander_routes[0]), c_gctx, diff --git a/src/main.cpp b/src/main.cpp deleted file mode 100644 index a36505f..0000000 --- a/src/main.cpp +++ /dev/null @@ -1,228 +0,0 @@ -#include -#include - -#include "crow.h" - -extern "C" { -#include "trie.h" -} - -static const std::string file_path = "lander.data"; -static const std::string index_page = R"( - - - -

r8r.be

-

This is the URL shortener and pastebin accompanying my site, The Rusty Bever.

- - -)"; - -#define ENV(var, env_var) \ - const char *_##var = getenv(env_var); \ - if (_##var == NULL) { \ - printf("Missing environment variable %s.\n", env_var); \ - return 1; \ - } \ - const std::string var = std::string(_##var); - -#define AUTH() \ - std::string provided_api_key = req.get_header_value("X-Api-Key"); \ - if (api_key.compare(provided_api_key) != 0) { \ - return crow::response(crow::status::UNAUTHORIZED); \ - } - -crow::response add_redirect(std::string base_url, Trie *trie, const char *url, - bool secure) { - Entry *new_entry = entry_new(Redirect, url); - - // The key already gets copied into the trie, so this pointer is safe to use - // ever after unlocking the trie - trie_wlock(trie); - char *key; - TrieExitCode res = trie_add_random(trie, &key, new_entry, secure); - trie_unlock(trie); - - if (res != Ok) { - return crow::response(crow::status::INTERNAL_SERVER_ERROR); - } - - std::string out = base_url + key; - free(key); - - return crow::response(out); -} - -bool store_paste(const char *key, const char *body) { - // Write paste contents to file - std::fstream file; - file.open(std::string("pastes/") + key, std::ios_base::out); - - if (!file.is_open()) { - return false; - } - - file << body; - file.close(); - - return true; -} - -crow::response add_paste(std::string base_url, Trie *trie, const char *body, - bool secure) { - Entry *new_entry = entry_new(Paste, ""); - - trie_wlock(trie); - char *key; - TrieExitCode res = trie_add_random(trie, &key, new_entry, secure); - trie_unlock(trie); - - if (res != Ok) { - return crow::response(crow::status::INTERNAL_SERVER_ERROR); - } - - if (!store_paste(key, body)) { - return crow::response(crow::status::INTERNAL_SERVER_ERROR); - } - - std::string out = base_url + key; - free(key); - - return crow::response(out); -} - -int main() { - // Initialize random seed for generating URLs - srand(time(NULL)); - - ENV(api_key, "LANDER_API_KEY"); - ENV(base_url, "LANDER_BASE_URL"); - - std::cout << "Initializing trie from file '" << file_path << "'..." - << std::endl; - - // Initialize trie and populate from data file - Trie *trie; - int res = trie_init(&trie, file_path.c_str()); - - if (res != 0) { - std::cout << "An error occured while initializing the trie." << std::endl; - - exit(1); - } - - std::cout << "Added " << trie_size(trie) << " entries to trie." << std::endl; - - // Create pastes directory if not present - // TODO don't just ignore errors here - mkdir("pastes", 0700); - - crow::SimpleApp app; - app.loglevel(crow::LogLevel::Info); - - CROW_ROUTE(app, "/").methods(crow::HTTPMethod::Get)( - []() { return crow::response("html", index_page); }); - - // Serve an entry - CROW_ROUTE(app, "/") - .methods(crow::HTTPMethod::Get)( - [trie](crow::response &res, std::string key) { - trie_rlock(trie); - Entry *entry; - TrieExitCode status = trie_search(trie, &entry, key.c_str()); - - if (status == Ok) { - if (entry->type == Redirect) { - res.redirect(entry->string); - } else if (entry->type == Paste) { - res.set_static_file_info("pastes/" + key); - } - } else { - res.code = 404; - } - - res.end(); - trie_unlock(trie); - }); - - // Add a new Redirect with a short randomly generated key - CROW_ROUTE(app, "/s/") - .methods(crow::HTTPMethod::Post)( - [api_key, base_url, trie](const crow::request req) { - AUTH(); - - return add_redirect(base_url, trie, req.body.c_str(), false); - }); - - // Add a new Redirect with a long randomly generated key - CROW_ROUTE(app, "/sl/") - .methods(crow::HTTPMethod::Post)( - [api_key, base_url, trie](const crow::request req) { - AUTH(); - - return add_redirect(base_url, trie, req.body.c_str(), true); - }); - - // Add a new Redirect with a given key - CROW_ROUTE(app, "/s/") - .methods(crow::HTTPMethod::Post)( - [api_key, base_url, trie](const crow::request &req, std::string key) { - AUTH(); - - Entry *new_entry = entry_new(Redirect, req.body.c_str()); - - trie_wlock(trie); - TrieExitCode status = trie_add(trie, key.c_str(), new_entry); - trie_unlock(trie); - - switch (status) { - case Ok: - return crow::response(base_url + key); - case AlreadyPresent: - return crow::response(crow::status::CONFLICT); - default: - return crow::response(crow::status::INTERNAL_SERVER_ERROR); - } - }); - - // Add a new Paste with a short randomly generated key - CROW_ROUTE(app, "/p/") - .methods(crow::HTTPMethod::Post)( - [api_key, base_url, trie](const crow::request &req) { - AUTH(); - - return add_paste(base_url, trie, req.body.c_str(), false); - }); - - // Add a new Paste with a long randomly generated key - CROW_ROUTE(app, "/pl/") - .methods(crow::HTTPMethod::Post)( - [api_key, base_url, trie](const crow::request &req) { - AUTH(); - - return add_paste(base_url, trie, req.body.c_str(), true); - }); - - // Add a paste with a given key - CROW_ROUTE(app, "/p/") - .methods(crow::HTTPMethod::Post)( - [api_key, base_url, trie](const crow::request &req, std::string key) { - AUTH(); - - Entry *new_entry = entry_new(Paste, ""); - trie_wlock(trie); - TrieExitCode status = trie_add(trie, key.c_str(), new_entry); - trie_unlock(trie); - - if (status != Ok) { - return crow::response(crow::status::CONFLICT); - } - - if (!store_paste(key.c_str(), req.body.c_str())) { - return crow::response(crow::status::INTERNAL_SERVER_ERROR); - } - - return crow::response(base_url + key); - }); - app.port(18080).multithreaded().run(); -} From 04aef2643fa6d4e0b9eb14a470cd8e7b3e004051 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Sun, 12 Nov 2023 15:00:20 +0100 Subject: [PATCH 60/70] chore: update changelog & landerctl --- CHANGELOG.md | 5 ++++- landerctl | 6 ++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 993ae07..3795d4a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,9 +16,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * Rewrite of trie codebase * Introduced a custom data store using an in-memory trie as index and a custom binary on-disk format - * Support for lookup & insert + * Support for lookup, insert & a basic remove * Lander * Replaced old trie implementation with LSM store + * Add support for hosting arbitrary files + * Content type of file is set if provided when uploading file + * Support removing entries ## [0.1.0](https://git.rustybever.be/Chewing_Bever/lander/src/tag/0.1.0) diff --git a/landerctl b/landerctl index d586942..74a7ed3 100755 --- a/landerctl +++ b/landerctl @@ -47,4 +47,10 @@ elif [ "$1" = f ]; then -H "X-Lander-Content-Type: $(file --mime-type --brief $2)" \ --data-binary @"$2" \ "$URL/f/$3" + +elif [ "$1" = d ]; then + curl \ + -XDELETE \ + -H "X-Api-Key: $API_KEY" \ + "$URL/$2" fi From 6a5b23afaaac2d84e7a33dc297e936ca7b5085d5 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Sun, 12 Nov 2023 16:13:54 +0100 Subject: [PATCH 61/70] feat(lander): store filename if provided --- include/lander.h | 21 +++++++--------- landerctl | 42 +++++++++++++++----------------- src/lander/lander.c | 46 ++++++++++++++++++++++++++++------- src/lander/lander_get.c | 2 ++ src/lander/lander_post.c | 4 +++ src/lander/lander_steps.c | 51 --------------------------------------- 6 files changed, 71 insertions(+), 95 deletions(-) diff --git a/include/lander.h b/include/lander.h index de89437..bff41fd 100644 --- a/include/lander.h +++ b/include/lander.h @@ -20,16 +20,9 @@ typedef enum lander_attr_type : uint8_t { lander_attr_type_entry_type = 0, lander_attr_type_content_type = 1, lander_attr_type_url = 2, + lander_attr_type_file_name = 3, } lander_attr_type; -typedef struct { - char *header; - lander_attr_type attr_type; - http_header header_type; -} header_to_attr; - -extern header_to_attr header_to_attrs[]; - typedef enum lander_entry_type : uint8_t { lander_entry_type_redirect = 0, lander_entry_type_paste = 1, @@ -67,11 +60,15 @@ bool lander_remove_entry(event_loop_conn *conn); bool lander_post_file(event_loop_conn *conn); /** - * Parse any custom headers and add them as attributes to the context's LSM - * entry + * Store the requested header as an attribute, if it's present. */ -bool lander_headers_to_attrs(event_loop_conn *conn); +void lander_header_to_attr(http_loop_ctx *ctx, char *header, + lander_attr_type attr_type); -bool lander_attrs_to_headers(event_loop_conn *conn); +/** + * Store the attribute's value as the provided header, if present. + */ +void lander_attr_to_header(http_loop_ctx *ctx, lander_attr_type attr_type, + http_header header_type); #endif diff --git a/landerctl b/landerctl index 74a7ed3..57ff78f 100755 --- a/landerctl +++ b/landerctl @@ -3,53 +3,49 @@ API_KEY=test URL=http://localhost:18080 +if [[ "$2" != '-' ]]; then + filename="$2" + content_type="$(file --mime-type --brief $2)" +fi + + if [ "$1" = g ]; then curl -is "$URL/$2" | sed -En 's/^[lL]ocation: (.*)/\1/p' -elif [ "$1" = s ]; then +elif [ "$1" = s ] || [ "$1" = sl ]; then curl \ + --fail \ -w "${URL}%header{location}" \ -XPOST \ -d "$2" \ -H "X-Api-Key: $API_KEY" \ - "$URL/s/$3" + "$URL/$1/$3" -elif [ "$1" = sl ]; then - curl \ - -w "${URL}%header{location}" \ - -XPOST \ - -d "$2" \ - -H "X-Api-Key: $API_KEY" \ - "$URL/sl/$3" - -elif [ "$1" = p ]; then +elif [ "$1" = p ] || [ "$1" = pl ]; then curl \ + --fail \ -w "${URL}%header{location}" \ -XPOST \ -H "X-Api-Key: $API_KEY" \ + -H "X-Lander-Filename: ${filename}" \ --data-binary @"$2" \ - "$URL/p/$3" + "$URL/$1/$3" -elif [ "$1" = pl ]; then +elif [ "$1" = f ] || [ "$1" = fl ]; then curl \ + --fail \ -w "${URL}%header{location}" \ -XPOST \ -H "X-Api-Key: $API_KEY" \ + -H "X-Lander-Content-Type: ${content_type}" \ + -H "X-Lander-Filename: ${filename}" \ --data-binary @"$2" \ - "$URL/pl/$3" - -elif [ "$1" = f ]; then - curl \ - -w "${URL}%header{location}" \ - -XPOST \ - -H "X-Api-Key: $API_KEY" \ - -H "X-Lander-Content-Type: $(file --mime-type --brief $2)" \ - --data-binary @"$2" \ - "$URL/f/$3" + "$URL/$1/$3" elif [ "$1" = d ]; then curl \ + --fail \ -XDELETE \ -H "X-Api-Key: $API_KEY" \ "$URL/$2" diff --git a/src/lander/lander.c b/src/lander/lander.c index 06395cb..442847a 100644 --- a/src/lander/lander.c +++ b/src/lander/lander.c @@ -1,4 +1,5 @@ #include +#include #include "http/types.h" #include "http_loop.h" @@ -19,7 +20,7 @@ http_route lander_routes[] = { .type = http_route_regex, .method = http_get, .path = "^/([^/]+)$", - .steps = {lander_get_entry, lander_attrs_to_headers, NULL}, + .steps = {lander_get_entry, NULL}, .steps_res = {http_loop_step_write_header, lander_stream_body_to_client, NULL}, }, @@ -52,18 +53,11 @@ http_route lander_routes[] = { .method = http_post, .path = "^/f(l?)/([^/]*)$", .steps = {http_loop_step_auth, http_loop_step_parse_content_length, - lander_post_file, lander_headers_to_attrs, - lander_stream_body_to_entry, NULL}, + lander_post_file, lander_stream_body_to_entry, NULL}, .steps_res = {http_loop_step_write_header, http_loop_step_write_body, NULL}}, }; -header_to_attr header_to_attrs[] = { - {"X-Lander-Content-Type", lander_attr_type_content_type, - http_header_content_type}, - {NULL, 0, 0}, -}; - void *lander_gctx_init() { return calloc(1, sizeof(lander_gctx)); } void *lander_ctx_init() { return calloc(1, sizeof(lander_ctx)); } @@ -77,3 +71,37 @@ void lander_ctx_reset(lander_ctx *ctx) { } void lander_ctx_free(lander_ctx *ctx) { free(ctx); } + +void lander_header_to_attr(http_loop_ctx *ctx, char *header_name, + lander_attr_type attr_type) { + lander_ctx *c_ctx = ctx->c; + + for (size_t i = 0; i < ctx->req.num_headers; i++) { + struct phr_header *header = &ctx->req.headers[i]; + + if (strncmp(header->name, header_name, header->name_len) == 0) { + if (header->value_len > 0) { + lsm_str *value; + lsm_str_init_copy_n(&value, (char *)header->value, header->value_len); + + lsm_entry_attr_insert(c_ctx->entry, attr_type, value); + } + + return; + } + } +} + +void lander_attr_to_header(http_loop_ctx *ctx, lander_attr_type attr_type, + http_header header_type) { + lander_ctx *c_ctx = ctx->c; + lsm_str *value; + + if (lsm_entry_attr_get(&value, c_ctx->entry, attr_type) == lsm_error_ok) { + char *buf = malloc(lsm_str_len(value) + 1); + memcpy(buf, lsm_str_ptr(value), lsm_str_len(value)); + buf[lsm_str_len(value)] = '\0'; + + http_res_add_header(&ctx->res, header_type, buf, true); + } +} diff --git a/src/lander/lander_get.c b/src/lander/lander_get.c index bd62a16..1fb1603 100644 --- a/src/lander/lander_get.c +++ b/src/lander/lander_get.c @@ -73,6 +73,8 @@ void lander_get_file(event_loop_conn *conn) { lander_ctx *c_ctx = ctx->c; ctx->res.body.expected_len = lsm_entry_data_len(c_ctx->entry); + lander_attr_to_header(ctx, lander_attr_type_content_type, + http_header_content_type); } bool lander_get_entry(event_loop_conn *conn) { diff --git a/src/lander/lander_post.c b/src/lander/lander_post.c index cdde6d0..b630373 100644 --- a/src/lander/lander_post.c +++ b/src/lander/lander_post.c @@ -104,6 +104,7 @@ bool lander_post_paste(event_loop_conn *conn) { lsm_entry_attr_insert_uint8_t(c_ctx->entry, lander_attr_type_entry_type, lander_entry_type_paste); + lander_header_to_attr(ctx, "X-Lander-Filename", lander_attr_type_file_name); return true; } @@ -119,6 +120,9 @@ bool lander_post_file(event_loop_conn *conn) { lsm_entry_attr_insert_uint8_t(c_ctx->entry, lander_attr_type_entry_type, lander_entry_type_file); + lander_header_to_attr(ctx, "X-Lander-Content-Type", + lander_attr_type_content_type); + lander_header_to_attr(ctx, "X-Lander-Filename", lander_attr_type_file_name); return true; } diff --git a/src/lander/lander_steps.c b/src/lander/lander_steps.c index c84f9d2..7804df5 100644 --- a/src/lander/lander_steps.c +++ b/src/lander/lander_steps.c @@ -20,54 +20,3 @@ bool lander_stream_body_to_entry(event_loop_conn *conn) { return lsm_entry_data_len(c_ctx->entry) == ctx->req.body.expected_len; } - -bool lander_headers_to_attrs(event_loop_conn *conn) { - http_loop_ctx *ctx = conn->ctx; - lander_ctx *c_ctx = ctx->c; - - for (size_t i = 0; i < ctx->req.num_headers; i++) { - struct phr_header *header = &ctx->req.headers[i]; - - int j = 0; - - while (header_to_attrs[j].header != NULL) { - if (strncmp(header->name, header_to_attrs[j].header, header->name_len) == - 0) { - lsm_str *value; - lsm_str_init_copy_n(&value, (char *)header->value, header->value_len); - - lsm_entry_attr_insert(c_ctx->entry, header_to_attrs[j].attr_type, - value); - - break; - } - - j++; - } - } - - return true; -} - -bool lander_attrs_to_headers(event_loop_conn *conn) { - http_loop_ctx *ctx = conn->ctx; - lander_ctx *c_ctx = ctx->c; - - int j = 0; - lsm_str *value; - - while (header_to_attrs[j].header != NULL) { - if (lsm_entry_attr_get(&value, c_ctx->entry, - header_to_attrs[j].attr_type) == lsm_error_ok) { - char *buf = malloc(lsm_str_len(value) + 1); - memcpy(buf, lsm_str_ptr(value), lsm_str_len(value)); - buf[lsm_str_len(value)] = '\0'; - - http_res_add_header(&ctx->res, header_to_attrs[j].header_type, buf, true); - } - - j++; - } - - return true; -} From b053aa6c9338c166fbe3f39c8fe84fcd5438b946 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Sun, 12 Nov 2023 16:35:55 +0100 Subject: [PATCH 62/70] feat(lander): serve content-disposition header --- include/http/types.h | 3 ++- src/http/http_consts.c | 3 ++- src/lander/lander_get.c | 16 ++++++++++++++++ 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/include/http/types.h b/include/http/types.h index 5b75bea..cccf0a0 100644 --- a/include/http/types.h +++ b/include/http/types.h @@ -124,7 +124,8 @@ extern const char *http_header_names[]; typedef enum http_header { http_header_connection = 0, http_header_location, - http_header_content_type + http_header_content_type, + http_header_content_disposition } http_header; typedef enum http_body_type { diff --git a/src/http/http_consts.c b/src/http/http_consts.c index 6a6e15c..8aa6f4b 100644 --- a/src/http/http_consts.c +++ b/src/http/http_consts.c @@ -96,7 +96,8 @@ const char *http_status_names[][32] = { const char *http_header_names[] = { "Connection", "Location", - "Content-Type" + "Content-Type", + "Content-Disposition" }; const char *http_mime_type_names[][2] = { diff --git a/src/lander/lander_get.c b/src/lander/lander_get.c index 1fb1603..102c631 100644 --- a/src/lander/lander_get.c +++ b/src/lander/lander_get.c @@ -1,6 +1,7 @@ #include #include "event_loop.h" +#include "http/res.h" #include "http/types.h" #include "lander.h" #include "log.h" @@ -75,6 +76,21 @@ void lander_get_file(event_loop_conn *conn) { ctx->res.body.expected_len = lsm_entry_data_len(c_ctx->entry); lander_attr_to_header(ctx, lander_attr_type_content_type, http_header_content_type); + + lsm_str *value; + char *buf; + + if (lsm_entry_attr_get(&value, c_ctx->entry, lander_attr_type_file_name) == + lsm_error_ok) { + buf = malloc(24 + lsm_str_len(value)); + int len = lsm_str_len(value); + sprintf(buf, "attachment; filename=\"%*s\"", len, lsm_str_ptr(value)); + } else { + buf = malloc(11); + strcpy(buf, "attachment"); + } + + http_res_add_header(&ctx->res, http_header_content_disposition, buf, true); } bool lander_get_entry(event_loop_conn *conn) { From 6af3e6ad6d85bb4bd9ba656481d7e940b8c0a656 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Tue, 14 Nov 2023 10:49:12 +0100 Subject: [PATCH 63/70] chore: integrate cppcheck into workflow --- Makefile | 20 +++++++++++++++++++- include/lander.h | 2 +- lsm/include/lsm/bt.h | 2 +- lsm/include/lsm/store.h | 2 +- lsm/include/lsm/str.h | 10 +++++----- lsm/include/lsm/trie.h | 2 +- lsm/src/bt/lsm_bt.c | 6 +----- lsm/src/store/lsm_store.c | 12 ++++-------- lsm/src/store/lsm_store_disk_read.c | 4 ++-- lsm/src/store/lsm_store_entry.c | 2 +- lsm/src/str/lsm_str.c | 10 +++++----- lsm/src/trie/lsm_trie.c | 16 +++++++--------- src/event_loop/event_loop.c | 27 +++++++-------------------- src/http_loop/http_loop.c | 6 +++--- src/http_loop/http_loop_steps.c | 6 +++--- src/lander/lander.c | 4 ++-- src/lander/lander_post.c | 2 +- 17 files changed, 64 insertions(+), 69 deletions(-) diff --git a/Makefile b/Makefile index 1dc0dce..c40c27a 100644 --- a/Makefile +++ b/Makefile @@ -43,7 +43,6 @@ libtrie: liblsm: $(MAKE) -C lsm -.PHONY: $(BIN) $(BIN): libtrie liblsm $(OBJS) $(CC) -o $@ $(OBJS) $(_LDFLAGS) @@ -55,6 +54,12 @@ $(BUILD_DIR)/$(THIRDPARTY_DIR)/%.c.o: $(THIRDPARTY_DIR)/%.c mkdir -p $(dir $@) $(CC) $(_CFLAGS) -c $< -o $@ +.PHONY: bin-docker +bin-docker: + docker build -t lander . + docker container create --name lander-temp lander + docker cp -q lander-temp:/bin/lander $(BUILD_DIR)/lander-docker + docker container rm lander-temp # =====TESTING===== .PHONY: run @@ -110,6 +115,19 @@ lint: fmt: clang-format -i $(SRCS) $(SRCS_H) $(SRCS_H_INTERNAL) +.PHONY: check +check: + mkdir -p $(BUILD_DIR)/cppcheck + cppcheck \ + $(addprefix -I,$(INC_DIRS)) \ + --cppcheck-build-dir=$(BUILD_DIR)/cppcheck \ + --project=compile_commands.json \ + --error-exitcode=1 \ + --enable=warning,style \ + -ithirdparty/* \ + -itrie/* \ + --quiet + .PHONY: clean clean: rm -rf $(BUILD_DIR) diff --git a/include/lander.h b/include/lander.h index bff41fd..a30c32d 100644 --- a/include/lander.h +++ b/include/lander.h @@ -62,7 +62,7 @@ bool lander_post_file(event_loop_conn *conn); /** * Store the requested header as an attribute, if it's present. */ -void lander_header_to_attr(http_loop_ctx *ctx, char *header, +void lander_header_to_attr(http_loop_ctx *ctx, const char *header, lander_attr_type attr_type); /** diff --git a/lsm/include/lsm/bt.h b/lsm/include/lsm/bt.h index 2e30ae5..d7fe497 100644 --- a/lsm/include/lsm/bt.h +++ b/lsm/include/lsm/bt.h @@ -28,7 +28,7 @@ void lsm_bt_clear(lsm_bt *bt); /** * Return the size of the binary tree */ -uint64_t lsm_bt_size(lsm_bt *bt); +uint64_t lsm_bt_size(const lsm_bt *bt); /** * Search for the data stored behind the given key. diff --git a/lsm/include/lsm/store.h b/lsm/include/lsm/store.h index 45eda7e..82785a5 100644 --- a/lsm/include/lsm/store.h +++ b/lsm/include/lsm/store.h @@ -117,7 +117,7 @@ lsm_error lsm_store_init(lsm_store **ptr); * @param store store to use * @return how many elements are in the store */ -uint64_t lsm_store_size(lsm_store *store); +uint64_t lsm_store_size(const lsm_store *store); /** * Open the given database file and load it into a new store object. diff --git a/lsm/include/lsm/str.h b/lsm/include/lsm/str.h index 01f2651..2866f4f 100644 --- a/lsm/include/lsm/str.h +++ b/lsm/include/lsm/str.h @@ -34,7 +34,7 @@ lsm_error lsm_str_init_zero(lsm_str **ptr); * @param ptr pointer to store newly allocated pointer * @param s string to copy into lsm string */ -lsm_error lsm_str_init_copy(lsm_str **ptr, char *s); +lsm_error lsm_str_init_copy(lsm_str **ptr, const char *s); /** * Same as `lsm_str_init_copy`, except that it takes an additional argument @@ -45,7 +45,7 @@ lsm_error lsm_str_init_copy(lsm_str **ptr, char *s); * @param s string to copy into lsm string * @param len length of string to copy */ -lsm_error lsm_str_init_copy_n(lsm_str **ptr, char *s, uint64_t len); +lsm_error lsm_str_init_copy_n(lsm_str **ptr, const char *s, uint64_t len); /** * Overwrite an existing lsm_str so it now represents the new provided string. @@ -65,7 +65,7 @@ void lsm_str_overwrite(lsm_str *str, char *s); * @param str lsm_str object to modify * @param s string to copy into lsm string */ -lsm_error lsm_str_overwrite_copy(lsm_str *str, char *s); +lsm_error lsm_str_overwrite_copy(lsm_str *str, const char *s); /** * Same as `lsm_str_overwrite_copy`, except the length is explicitely specified, @@ -75,7 +75,7 @@ lsm_error lsm_str_overwrite_copy(lsm_str *str, char *s); * @param s string to copy into lsm string * @param len length of the string to copy */ -lsm_error lsm_str_overwrite_copy_n(lsm_str *str, char *s, uint64_t len); +lsm_error lsm_str_overwrite_copy_n(lsm_str *str, const char *s, uint64_t len); /** * Deallocate the existing internal string if needed and replace the lsm_str @@ -99,7 +99,7 @@ void lsm_str_free(lsm_str *str); * * @param str string to return length for. */ -uint64_t lsm_str_len(lsm_str *str); +uint64_t lsm_str_len(const lsm_str *str); /** * Return a pointer to the string's underlying char array. Note that this array diff --git a/lsm/include/lsm/trie.h b/lsm/include/lsm/trie.h index 7fd6b5b..6801890 100644 --- a/lsm/include/lsm/trie.h +++ b/lsm/include/lsm/trie.h @@ -55,6 +55,6 @@ lsm_error lsm_trie_remove(void **data, lsm_trie *trie, lsm_str *key); * * @param trie trie to return size for */ -uint64_t lsm_trie_size(lsm_trie *trie); +uint64_t lsm_trie_size(const lsm_trie *trie); #endif diff --git a/lsm/src/bt/lsm_bt.c b/lsm/src/bt/lsm_bt.c index 69fa895..6bfd435 100644 --- a/lsm/src/bt/lsm_bt.c +++ b/lsm/src/bt/lsm_bt.c @@ -57,7 +57,7 @@ void lsm_bt_free(lsm_bt *bt) { free(bt); } -uint64_t lsm_bt_size(lsm_bt *bt) { return bt->size; } +uint64_t lsm_bt_size(const lsm_bt *bt) { return bt->size; } lsm_error lsm_bt_insert(lsm_bt *bt, char key, void *data) { lsm_bt_node **dest = &bt->root; @@ -102,10 +102,6 @@ lsm_error lsm_bt_search(void **out, lsm_bt *bt, char key) { } lsm_error lsm_bt_remove(void **out, lsm_bt *bt, char key) { - if (bt->root == NULL) { - return lsm_error_not_found; - } - lsm_bt_node **dest = &bt->root; while ((*dest != NULL) && ((*dest)->key != key)) { diff --git a/lsm/src/store/lsm_store.c b/lsm/src/store/lsm_store.c index f93e289..460350c 100644 --- a/lsm/src/store/lsm_store.c +++ b/lsm/src/store/lsm_store.c @@ -30,7 +30,7 @@ lsm_error lsm_store_init(lsm_store **ptr) { return lsm_error_ok; } -uint64_t lsm_store_size(lsm_store *store) { return lsm_trie_size(store->trie); } +uint64_t lsm_store_size(const lsm_store *store) { return lsm_trie_size(store->trie); } lsm_error lsm_store_open_read(lsm_entry_handle **out, lsm_store *store, lsm_str *key) { @@ -43,11 +43,9 @@ lsm_error lsm_store_open_read(lsm_entry_handle **out, lsm_store *store, return lsm_error_lock_busy; } - lsm_entry *entry = wrapper->entry; - // While the trie's data field will never be NULL, the actual entry pointer // might be - if (entry == NULL) { + if (wrapper->entry == NULL) { pthread_rwlock_unlock(&wrapper->lock); return lsm_error_not_found; @@ -81,11 +79,9 @@ lsm_error lsm_store_open_write(lsm_entry_handle **out, lsm_store *store, return lsm_error_lock_busy; } - lsm_entry *entry = wrapper->entry; - // While the trie's data field will never be NULL, the actual entry pointer // might be - if (entry == NULL) { + if (wrapper->entry == NULL) { pthread_rwlock_unlock(&wrapper->lock); return lsm_error_not_found; @@ -202,7 +198,7 @@ lsm_error lsm_entry_data_append(lsm_entry_handle *handle, lsm_str *data) { lsm_error lsm_entry_data_read(uint64_t *out, char *buf, lsm_entry_handle *handle, uint64_t len) { - lsm_entry *entry = handle->wrapper->entry; + const lsm_entry *entry = handle->wrapper->entry; if (entry->data_len == 0) { *out = 0; diff --git a/lsm/src/store/lsm_store_disk_read.c b/lsm/src/store/lsm_store_disk_read.c index 72e34bd..b3e057a 100644 --- a/lsm/src/store/lsm_store_disk_read.c +++ b/lsm/src/store/lsm_store_disk_read.c @@ -27,7 +27,7 @@ lsm_error lsm_store_load(lsm_store **ptr, lsm_str *data_path) { fclose(db_file); - FILE *db_file = fopen(db_file_path, "r+b"); + db_file = fopen(db_file_path, "r+b"); if (db_file == NULL) { return lsm_error_failed_io; @@ -62,7 +62,7 @@ lsm_error lsm_store_load(lsm_store **ptr, lsm_str *data_path) { fclose(idx_file); // If opening it in extended read mode still fails now, there's a problem - FILE *idx_file = fopen(idx_file_path, "r+b"); + idx_file = fopen(idx_file_path, "r+b"); if (idx_file == NULL) { return lsm_error_failed_io; diff --git a/lsm/src/store/lsm_store_entry.c b/lsm/src/store/lsm_store_entry.c index a9c0ade..04a4710 100644 --- a/lsm/src/store/lsm_store_entry.c +++ b/lsm/src/store/lsm_store_entry.c @@ -107,7 +107,7 @@ lsm_error lsm_entry_attr_get_uint64_t(uint64_t *out, lsm_entry_handle *handle, LSM_RES(lsm_entry_attr_get(&s, handle, type)); - uint64_t num; + uint64_t num = 0; for (uint8_t i = 0; i < sizeof(uint64_t) / sizeof(char); i++) { ((char *)&num)[i] = lsm_str_char(s, i); diff --git a/lsm/src/str/lsm_str.c b/lsm/src/str/lsm_str.c index 0cfd571..393bbd2 100644 --- a/lsm/src/str/lsm_str.c +++ b/lsm/src/str/lsm_str.c @@ -33,7 +33,7 @@ lsm_error lsm_str_init_zero(lsm_str **ptr) { return lsm_error_ok; } -lsm_error lsm_str_init_copy(lsm_str **ptr, char *s) { +lsm_error lsm_str_init_copy(lsm_str **ptr, const char *s) { lsm_str *str = calloc(1, sizeof(lsm_str)); if (str == NULL) { @@ -47,7 +47,7 @@ lsm_error lsm_str_init_copy(lsm_str **ptr, char *s) { return lsm_error_ok; } -lsm_error lsm_str_init_copy_n(lsm_str **ptr, char *s, uint64_t len) { +lsm_error lsm_str_init_copy_n(lsm_str **ptr, const char *s, uint64_t len) { lsm_str *str = calloc(1, sizeof(lsm_str)); if (str == NULL) { @@ -72,11 +72,11 @@ void lsm_str_overwrite(lsm_str *str, char *s) { } } -lsm_error lsm_str_overwrite_copy(lsm_str *str, char *s) { +lsm_error lsm_str_overwrite_copy(lsm_str *str, const char *s) { return lsm_str_overwrite_copy_n(str, s, strlen(s)); } -lsm_error lsm_str_overwrite_copy_n(lsm_str *str, char *s, uint64_t len) { +lsm_error lsm_str_overwrite_copy_n(lsm_str *str, const char *s, uint64_t len) { if (len <= 8) { memcpy(str->data.val, s, len); } else { @@ -108,7 +108,7 @@ void lsm_str_free(lsm_str *str) { free(str); } -uint64_t lsm_str_len(lsm_str *str) { return str->len; } +uint64_t lsm_str_len(const lsm_str *str) { return str->len; } const char *lsm_str_ptr(lsm_str *str) { if (str->len <= 8) { diff --git a/lsm/src/trie/lsm_trie.c b/lsm/src/trie/lsm_trie.c index 0e5b548..eb13ec4 100644 --- a/lsm/src/trie/lsm_trie.c +++ b/lsm/src/trie/lsm_trie.c @@ -32,6 +32,8 @@ lsm_error lsm_trie_init(lsm_trie **ptr) { lsm_error res = lsm_trie_node_init(&root); if (res != lsm_error_ok) { + free(trie); + return res; } @@ -41,7 +43,7 @@ lsm_error lsm_trie_init(lsm_trie **ptr) { return lsm_error_ok; } -uint64_t lsm_trie_size(lsm_trie *trie) { return trie->size; } +uint64_t lsm_trie_size(const lsm_trie *trie) { return trie->size; } lsm_error lsm_trie_insert(lsm_trie *trie, lsm_str *key, void *data) { // NULL is not allowed as a data value, as it's used to indicate a lack of @@ -67,11 +69,10 @@ lsm_error lsm_trie_insert(lsm_trie *trie, lsm_str *key, void *data) { uint64_t index = 0; lsm_trie_node *node = trie->root; lsm_trie_node *next_node; - lsm_error res; while (index < key_len) { char c = lsm_str_char(key, index); - res = lsm_bt_search((void **)&next_node, &node->bt, c); + lsm_error res = lsm_bt_search((void **)&next_node, &node->bt, c); // No child is present yet for this character, so we can insert the string // here @@ -165,11 +166,10 @@ lsm_error lsm_trie_search(void **out, lsm_trie *trie, lsm_str *key) { uint64_t index = 0; lsm_trie_node *node = trie->root; lsm_trie_node *next_node; - lsm_error res; while (index < key_len) { char c = lsm_str_char(key, index); - res = lsm_bt_search((void **)&next_node, &node->bt, c); + lsm_error res = lsm_bt_search((void **)&next_node, &node->bt, c); if (res != lsm_error_ok) { return res; @@ -220,12 +220,10 @@ lsm_error lsm_trie_remove(void **data, lsm_trie *trie, lsm_str *key) { uint64_t index = 0; lsm_trie_node *parent = trie->root; lsm_trie_node *child; - lsm_error res; - char c; while (index < key_len) { - c = lsm_str_char(key, index); - res = lsm_bt_search((void **)&child, &parent->bt, c); + char c = lsm_str_char(key, index); + lsm_error res = lsm_bt_search((void **)&child, &parent->bt, c); if (res != lsm_error_ok) { return res; diff --git a/src/event_loop/event_loop.c b/src/event_loop/event_loop.c index 53317bc..8129223 100644 --- a/src/event_loop/event_loop.c +++ b/src/event_loop/event_loop.c @@ -13,14 +13,12 @@ #include "event_loop.h" #include "log.h" -static int event_loop_fd_set_nb(int fd) { +static void event_loop_fd_set_nb(int fd) { int flags = fcntl(fd, F_GETFL); flags |= O_NONBLOCK; fcntl(fd, F_SETFL, flags); - - return 0; } event_loop *event_loop_init() { @@ -61,13 +59,7 @@ int event_loop_accept(event_loop *el, int fd) { } // set the new connection fd to nonblocking mode - int res = event_loop_fd_set_nb(connfd); - - if (res < 0) { - close(connfd); - - return -2; - } + event_loop_fd_set_nb(connfd); // creating the struct Conn event_loop_conn *conn = event_loop_conn_init(el); @@ -82,7 +74,7 @@ int event_loop_accept(event_loop *el, int fd) { conn->fd = connfd; conn->state = event_loop_conn_state_req; - res = event_loop_put(el, conn); + int res = event_loop_put(el, conn); if (res != 0) { close(connfd); @@ -126,16 +118,10 @@ void event_loop_run(event_loop *el, int port) { } // The listening socket is always poll'ed in non-blocking mode as well - res = event_loop_fd_set_nb(fd); - - if (res != 0) { - critical(1, "Failed to set listening socket to non-blocking, errno: %i", - errno); - } + event_loop_fd_set_nb(fd); // TODO don't hardcode the number 32 struct pollfd *poll_args = calloc(sizeof(struct pollfd), 32); - size_t poll_args_count; // for convenience, the listening fd is put in the first position struct pollfd pfd = {fd, POLLIN, 0}; @@ -147,7 +133,7 @@ void event_loop_run(event_loop *el, int port) { info("Starting event loop on port %i", port); while (1) { - poll_args_count = 1; + size_t poll_args_count = 1; // connection fds for (size_t i = 0; i < el->connection_count; i++) { @@ -160,7 +146,8 @@ void event_loop_run(event_loop *el, int port) { events = (conn->state == event_loop_conn_state_req) ? POLLIN : POLLOUT; events |= POLLERR; - struct pollfd pfd = {conn->fd, events, 0}; + pfd.fd = conn->fd; + pfd.events = events; poll_args[poll_args_count] = pfd; poll_args_count++; diff --git a/src/http_loop/http_loop.c b/src/http_loop/http_loop.c index 964992d..cb4289e 100644 --- a/src/http_loop/http_loop.c +++ b/src/http_loop/http_loop.c @@ -72,17 +72,17 @@ event_loop *http_loop_init(http_route *routes, size_t route_count, } void http_loop_set_api_key(http_loop *hl, const char *api_key) { - ((http_loop_gctx *)hl->gctx)->api_key = api_key; + http_loop_gctx *gctx = hl->gctx; + gctx->api_key = api_key; } void http_loop_run(event_loop *el, int port) { debug("Compiling RegEx routes"); http_loop_gctx *gctx = el->gctx; - http_route *route; for (size_t i = 0; i < gctx->route_count; i++) { - route = &gctx->routes[i]; + http_route *route = &gctx->routes[i]; if (route->type == http_route_regex) { regex_t *r = calloc(sizeof(regex_t), 1); diff --git a/src/http_loop/http_loop_steps.c b/src/http_loop/http_loop_steps.c index bcfeae7..c3c9c0a 100644 --- a/src/http_loop/http_loop_steps.c +++ b/src/http_loop/http_loop_steps.c @@ -28,7 +28,7 @@ bool http_loop_step_parse_content_length(event_loop_conn *conn) { http_loop_ctx *ctx = conn->ctx; for (size_t i = 0; i < ctx->req.num_headers; i++) { - struct phr_header *header = &ctx->req.headers[i]; + const struct phr_header *header = &ctx->req.headers[i]; if (strncmp(header->name, "Content-Length", header->name_len) == 0) { // If the content length header is present but contains an invalid @@ -65,7 +65,7 @@ bool try_parse_content_length(event_loop_conn *conn) { http_loop_ctx *ctx = conn->ctx; for (size_t i = 0; i < ctx->req.num_headers; i++) { - struct phr_header *header = &ctx->req.headers[i]; + const struct phr_header *header = &ctx->req.headers[i]; if (strncmp(header->name, "Content-Length", header->name_len) == 0) { // If the content length header is present but contains an invalid @@ -145,7 +145,7 @@ bool http_loop_step_auth(event_loop_conn *conn) { http_loop_ctx *ctx = conn->ctx; for (size_t i = 0; i < ctx->req.num_headers; i++) { - struct phr_header *header = &ctx->req.headers[i]; + const struct phr_header *header = &ctx->req.headers[i]; if ((strncmp("X-Api-Key", header->name, header->name_len) == 0) && (strncmp(header->value, ctx->g->api_key, header->value_len) == 0) && diff --git a/src/lander/lander.c b/src/lander/lander.c index 442847a..5d1c1fe 100644 --- a/src/lander/lander.c +++ b/src/lander/lander.c @@ -72,12 +72,12 @@ void lander_ctx_reset(lander_ctx *ctx) { void lander_ctx_free(lander_ctx *ctx) { free(ctx); } -void lander_header_to_attr(http_loop_ctx *ctx, char *header_name, +void lander_header_to_attr(http_loop_ctx *ctx, const char *header_name, lander_attr_type attr_type) { lander_ctx *c_ctx = ctx->c; for (size_t i = 0; i < ctx->req.num_headers; i++) { - struct phr_header *header = &ctx->req.headers[i]; + const struct phr_header *header = &ctx->req.headers[i]; if (strncmp(header->name, header_name, header->name_len) == 0) { if (header->value_len > 0) { diff --git a/src/lander/lander_post.c b/src/lander/lander_post.c index b630373..9711d03 100644 --- a/src/lander/lander_post.c +++ b/src/lander/lander_post.c @@ -37,7 +37,7 @@ bool lander_insert_entry(http_loop_ctx *ctx) { randomize_key(key_s, key_len); lsm_str_init(&key, key_s); } else { - char *key_s = (char *)&ctx->req.path[ctx->req.regex_groups[2].rm_so]; + const char *key_s = &ctx->req.path[ctx->req.regex_groups[2].rm_so]; key_len = ctx->req.regex_groups[2].rm_eo - ctx->req.regex_groups[2].rm_so; lsm_str_init_copy_n(&key, key_s, key_len); From 29f4edc059bd654f666b56710aba95f2672389d3 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Tue, 14 Nov 2023 15:34:07 +0100 Subject: [PATCH 64/70] chore(lander): fix Docker build --- .dockerignore | 19 +++++++++++++++---- Dockerfile | 13 +++++++------ Makefile | 12 +++++------- config.mk | 4 ++-- 4 files changed, 29 insertions(+), 19 deletions(-) diff --git a/.dockerignore b/.dockerignore index 4570fd8..9bda86a 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,4 +1,15 @@ -data/ -build/ -trie/build/ -.git/ +* + +!src/ +!include/ + +!lsm/src/ +!lsm/include/ +!lsm/Makefile +!lsm/config.mk + +!thirdparty/include +!thirdparty/src + +!Makefile +!config.mk diff --git a/Dockerfile b/Dockerfile index 2b7b000..dd4eb42 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,11 +1,12 @@ -FROM alpine:3.18.0 AS builder +FROM ubuntu:23.10 AS builder ARG DI_VER=1.2.5 -RUN apk add --update --no-cache \ - build-base \ - make \ - curl +RUN apt update && \ + apt install -y --no-install-recommends \ + curl ca-certificates \ + build-essential \ + musl musl-dev musl-tools WORKDIR /app @@ -18,7 +19,7 @@ RUN curl -Lo - "https://github.com/Yelp/dumb-init/archive/refs/tags/v${DI_VER}.t COPY . ./ -RUN make CFLAGS='-O3' LDFLAGS='-static -flto' && \ +RUN make CFLAGS='-O3' LDFLAGS='-flto -static' && \ strip build/lander && \ readelf -d build/lander && \ [ "$(readelf -d build/lander | grep NEEDED | wc -l)" = 0 ] diff --git a/Makefile b/Makefile index c40c27a..63c35f2 100644 --- a/Makefile +++ b/Makefile @@ -35,15 +35,11 @@ all: $(BIN) .PHONY: objs objs: $(OBJS) -.PHONY: libtrie -libtrie: - $(MAKE) -C trie - .PHONY: liblsm liblsm: $(MAKE) -C lsm -$(BIN): libtrie liblsm $(OBJS) +$(BIN): liblsm $(OBJS) $(CC) -o $@ $(OBJS) $(_LDFLAGS) $(BUILD_DIR)/$(SRC_DIR)/%.c.o: $(SRC_DIR)/%.c @@ -126,12 +122,14 @@ check: --enable=warning,style \ -ithirdparty/* \ -itrie/* \ - --quiet + --inline-suppr \ + --check-level=exhaustive \ + --quiet \ + -j$(shell nproc) .PHONY: clean clean: rm -rf $(BUILD_DIR) - $(MAKE) -C trie clean $(MAKE) -C lsm clean diff --git a/config.mk b/config.mk index 8336cd1..9b22a21 100644 --- a/config.mk +++ b/config.mk @@ -8,8 +8,8 @@ TEST_DIR = test THIRDPARTY_DIR = thirdparty INC_DIRS = include $(THIRDPARTY_DIR)/include trie/include lsm/include -LIBS = trie m lsm -LIB_DIRS = ./trie/build ./lsm/build +LIBS = m lsm +LIB_DIRS = ./lsm/build # -MMD: generate a .d file for every source file. This file can be imported by # make and makes make aware that a header file has been changed, ensuring an From f97de2fe832fd82ddb06eb9ceb63f8595351eeb3 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Tue, 14 Nov 2023 15:36:18 +0100 Subject: [PATCH 65/70] fix(event_loop): fix some wrong allocs --- landerctl | 13 +++++++------ lsm/src/store/lsm_store.c | 4 +++- src/event_loop/event_loop.c | 4 ++-- src/http_loop/http_loop_res.c | 1 + src/http_loop/http_loop_steps.c | 2 +- 5 files changed, 14 insertions(+), 10 deletions(-) diff --git a/landerctl b/landerctl index 57ff78f..28f586f 100755 --- a/landerctl +++ b/landerctl @@ -10,11 +10,11 @@ fi if [ "$1" = g ]; then - curl -is "$URL/$2" | + exec curl -is "$URL/$2" | sed -En 's/^[lL]ocation: (.*)/\1/p' elif [ "$1" = s ] || [ "$1" = sl ]; then - curl \ + exec curl \ --fail \ -w "${URL}%header{location}" \ -XPOST \ @@ -23,7 +23,7 @@ elif [ "$1" = s ] || [ "$1" = sl ]; then "$URL/$1/$3" elif [ "$1" = p ] || [ "$1" = pl ]; then - curl \ + exec curl \ --fail \ -w "${URL}%header{location}" \ -XPOST \ @@ -33,18 +33,19 @@ elif [ "$1" = p ] || [ "$1" = pl ]; then "$URL/$1/$3" elif [ "$1" = f ] || [ "$1" = fl ]; then - curl \ + exec curl \ --fail \ + -v \ -w "${URL}%header{location}" \ -XPOST \ -H "X-Api-Key: $API_KEY" \ -H "X-Lander-Content-Type: ${content_type}" \ -H "X-Lander-Filename: ${filename}" \ - --data-binary @"$2" \ + -T "$2" \ "$URL/$1/$3" elif [ "$1" = d ]; then - curl \ + exec curl \ --fail \ -XDELETE \ -H "X-Api-Key: $API_KEY" \ diff --git a/lsm/src/store/lsm_store.c b/lsm/src/store/lsm_store.c index 460350c..8be426b 100644 --- a/lsm/src/store/lsm_store.c +++ b/lsm/src/store/lsm_store.c @@ -30,7 +30,9 @@ lsm_error lsm_store_init(lsm_store **ptr) { return lsm_error_ok; } -uint64_t lsm_store_size(const lsm_store *store) { return lsm_trie_size(store->trie); } +uint64_t lsm_store_size(const lsm_store *store) { + return lsm_trie_size(store->trie); +} lsm_error lsm_store_open_read(lsm_entry_handle **out, lsm_store *store, lsm_str *key) { diff --git a/src/event_loop/event_loop.c b/src/event_loop/event_loop.c index 8129223..a01ca37 100644 --- a/src/event_loop/event_loop.c +++ b/src/event_loop/event_loop.c @@ -25,7 +25,7 @@ event_loop *event_loop_init() { event_loop *el = calloc(sizeof(event_loop), 1); // No idea if this is a good starter value - el->connections = calloc(sizeof(event_loop_conn), 16); + el->connections = calloc(sizeof(event_loop_conn *), 16); el->connection_count = 16; return el; @@ -34,7 +34,7 @@ event_loop *event_loop_init() { int event_loop_put(event_loop *el, event_loop_conn *conn) { if ((size_t)conn->fd >= el->connection_count) { event_loop_conn **resized = - realloc(el->connections, sizeof(event_loop_conn) * (conn->fd + 1)); + realloc(el->connections, sizeof(event_loop_conn *) * (conn->fd + 1)); if (resized == NULL) { return -1; diff --git a/src/http_loop/http_loop_res.c b/src/http_loop/http_loop_res.c index fec7cd9..b29550f 100644 --- a/src/http_loop/http_loop_res.c +++ b/src/http_loop/http_loop_res.c @@ -1,6 +1,7 @@ #include "http_loop.h" #include "log.h" +// cppcheck-suppress syntaxError static const char *http_response_format = "HTTP/1.1 %i %s\n" "Server: lander/" LANDER_VERSION "\n" "Content-Length: %lu\n"; diff --git a/src/http_loop/http_loop_steps.c b/src/http_loop/http_loop_steps.c index c3c9c0a..c3ff36a 100644 --- a/src/http_loop/http_loop_steps.c +++ b/src/http_loop/http_loop_steps.c @@ -104,7 +104,7 @@ bool http_loop_step_body_to_buf(event_loop_conn *conn) { } ctx->req.body.type = http_body_buf; - ctx->req.body.buf = malloc(ctx->req.body.expected_len * sizeof(uint8_t)); + ctx->req.body.buf = malloc(ctx->req.body.expected_len * sizeof(char)); ctx->req.body.len = 0; } From 13b20715bfff3981b9cf8e03658b9a68e7cae776 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Tue, 14 Nov 2023 20:34:01 +0100 Subject: [PATCH 66/70] fix(http_loop): correctly parse content-type --- src/http_loop/http_loop_steps.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/http_loop/http_loop_steps.c b/src/http_loop/http_loop_steps.c index c3ff36a..99c5cce 100644 --- a/src/http_loop/http_loop_steps.c +++ b/src/http_loop/http_loop_steps.c @@ -4,6 +4,18 @@ #include "http_loop.h" #include "lander.h" +// Just a naive pow implementation; might improve later +static uint64_t ipow(uint64_t base, uint64_t power) { + uint64_t res = 1; + + while (power > 0) { + res *= base; + power--; + } + + return res; +} + /* * Converts a string to a number, returning true if the string contained a valid * positive number. @@ -18,7 +30,7 @@ static bool string_to_num(size_t *res, const char *s, size_t len) { return false; } - *res += val * (int)pow(10, (len - 1) - i); + *res += (uint64_t)val * ipow(10, (len - 1) - i); } return true; From 49c4c782421cc35c54a6be5a6d362048fdf94334 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Wed, 15 Nov 2023 09:35:59 +0100 Subject: [PATCH 67/70] feat(landerctl): started custom cli tool; wrote config parser --- landerctl | 53 --------------- landerctl/.landerrc | 1 + landerctl/Makefile | 120 ++++++++++++++++++++++++++++++++++ landerctl/config.mk | 22 +++++++ landerctl/include/landerctl.h | 23 +++++++ landerctl/src/cfg_parse.c | 59 +++++++++++++++++ landerctl/src/main.c | 87 ++++++++++++++++++++++++ 7 files changed, 312 insertions(+), 53 deletions(-) delete mode 100755 landerctl create mode 100644 landerctl/.landerrc create mode 100644 landerctl/Makefile create mode 100644 landerctl/config.mk create mode 100644 landerctl/include/landerctl.h create mode 100644 landerctl/src/cfg_parse.c create mode 100644 landerctl/src/main.c diff --git a/landerctl b/landerctl deleted file mode 100755 index 28f586f..0000000 --- a/landerctl +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env sh - -API_KEY=test -URL=http://localhost:18080 - -if [[ "$2" != '-' ]]; then - filename="$2" - content_type="$(file --mime-type --brief $2)" -fi - - -if [ "$1" = g ]; then - exec curl -is "$URL/$2" | - sed -En 's/^[lL]ocation: (.*)/\1/p' - -elif [ "$1" = s ] || [ "$1" = sl ]; then - exec curl \ - --fail \ - -w "${URL}%header{location}" \ - -XPOST \ - -d "$2" \ - -H "X-Api-Key: $API_KEY" \ - "$URL/$1/$3" - -elif [ "$1" = p ] || [ "$1" = pl ]; then - exec curl \ - --fail \ - -w "${URL}%header{location}" \ - -XPOST \ - -H "X-Api-Key: $API_KEY" \ - -H "X-Lander-Filename: ${filename}" \ - --data-binary @"$2" \ - "$URL/$1/$3" - -elif [ "$1" = f ] || [ "$1" = fl ]; then - exec curl \ - --fail \ - -v \ - -w "${URL}%header{location}" \ - -XPOST \ - -H "X-Api-Key: $API_KEY" \ - -H "X-Lander-Content-Type: ${content_type}" \ - -H "X-Lander-Filename: ${filename}" \ - -T "$2" \ - "$URL/$1/$3" - -elif [ "$1" = d ]; then - exec curl \ - --fail \ - -XDELETE \ - -H "X-Api-Key: $API_KEY" \ - "$URL/$2" -fi diff --git a/landerctl/.landerrc b/landerctl/.landerrc new file mode 100644 index 0000000..db525c7 --- /dev/null +++ b/landerctl/.landerrc @@ -0,0 +1 @@ +api_key = test diff --git a/landerctl/Makefile b/landerctl/Makefile new file mode 100644 index 0000000..72b8239 --- /dev/null +++ b/landerctl/Makefile @@ -0,0 +1,120 @@ +# https://spin.atomicobject.com/2016/08/26/makefile-c-projects/ was a great +# base for this Makefile + +-include config.mk + +export CFLAGS +export LDFLAGS + +BIN := $(BUILD_DIR)/$(BIN_FILENAME) + +SRCS != find '$(SRC_DIR)' -iname '*.c' + +SRCS_H != find include -iname '*.h' + +OBJS := $(SRCS:%=$(BUILD_DIR)/%.o) $(SRCS_THIRDPARTY:%=$(BUILD_DIR)/%.o) +DEPS := $(SRCS:%=$(BUILD_DIR)/%.d) + +_CFLAGS := $(addprefix -I,$(INC_DIRS)) $(CFLAGS) -Wall -Wextra -DLANDER_VERSION=\"$(VERSION)\" +_LDFLAGS := $(addprefix -L,$(LIB_DIRS)) $(addprefix -l,$(LIBS)) $(LDFLAGS) + +.PHONY: all +all: $(BIN) + + +# =====COMPILATION===== +# Utility used by the CI to lint +.PHONY: objs +objs: $(OBJS) + +$(BIN): $(OBJS) + $(CC) -o $@ $(OBJS) $(_LDFLAGS) + +$(BUILD_DIR)/$(SRC_DIR)/%.c.o: $(SRC_DIR)/%.c + mkdir -p $(dir $@) + $(CC) $(_CFLAGS) -c $< -o $@ + +$(BUILD_DIR)/$(THIRDPARTY_DIR)/%.c.o: $(THIRDPARTY_DIR)/%.c + mkdir -p $(dir $@) + $(CC) $(_CFLAGS) -c $< -o $@ + +# =====TESTING===== +.PHONY: run +run: $(BIN) + LANDER_API_KEY=test \ + LANDER_DATA_DIR=data \ + '$(BUILD_DIR)/$(BIN_FILENAME)' + +.PHONY: valgrind +valgrind: $(BIN) + LANDER_API_KEY=test \ + LANDER_DATA_DIR=data \ + valgrind '$(BUILD_DIR)/$(BIN_FILENAME)' + +.PHONY: test +test: $(TARGETS_TEST) + +.PHONY: test-mem +test-mem: $(TARGETS_MEM_TEST) + +.PHONY: $(TARGETS_TEST) +$(TARGETS_TEST): test-%: % + ./$^ + +.PHONY: $(TARGETS_MEM_TEST) +$(TARGETS_MEM_TEST): test-mem-%: % + valgrind --tool=memcheck --error-exitcode=1 --track-origins=yes --leak-check=full ./$^ + +.PHONY: build-test +build-test: $(BINS_TEST) + +$(BINS_TEST): %: %.c.o + $(CC) \ + $^ -o $@ + +# Along with the include directory, each test includes $(TEST_DIR) (which +# contains the acutest.h header file), and the src directory of the module it's +# testing. This allows tests to access internal methods, which aren't publicly +# exposed. +$(BUILD_DIR)/$(TEST_DIR)/%.c.o: $(TEST_DIR)/%.c + mkdir -p $(dir $@) + $(CC) $(_CFLAGS) -I$(TEST_DIR) \ + -I$(dir $(@:$(BUILD_DIR)/$(TEST_DIR)/%=$(SRC_DIR)/%)) \ + -c $< -o $@ + + +# =====MAINTENANCE===== +.PHONY: lint +lint: + clang-format -n --Werror $(SRCS) $(SRCS_H) $(SRCS_H_INTERNAL) + +.PHONY: fmt +fmt: + clang-format -i $(SRCS) $(SRCS_H) $(SRCS_H_INTERNAL) + +.PHONY: check +check: + mkdir -p $(BUILD_DIR)/cppcheck + cppcheck \ + $(addprefix -I,$(INC_DIRS)) \ + --cppcheck-build-dir=$(BUILD_DIR)/cppcheck \ + --project=compile_commands.json \ + --error-exitcode=1 \ + --enable=warning,style \ + --inline-suppr \ + --check-level=exhaustive \ + --quiet \ + -j$(shell nproc) + +.PHONY: clean +clean: + rm -rf $(BUILD_DIR) + +.PHONY: bear +bear: clean + bear -- make + bear --append -- make build-test + + +# Make make aware of the .d files +-include $(DEPS) diff --git a/landerctl/config.mk b/landerctl/config.mk new file mode 100644 index 0000000..611faaf --- /dev/null +++ b/landerctl/config.mk @@ -0,0 +1,22 @@ +VERSION := 0.2.0 + +BIN_FILENAME = landerctl + +BUILD_DIR = build +SRC_DIR = src +TEST_DIR = test + +INC_DIRS = include +LIBS = magic curl +LIB_DIRS = + +# -MMD: generate a .d file for every source file. This file can be imported by +# make and makes make aware that a header file has been changed, ensuring an +# object file is also recompiled if only a header is changed. +# -MP: generate a dummy target for every header file (according to the docs it +# prevents some errors when removing header files) +CFLAGS = -MMD -MP -g + +# When compiling release builds, these flags are better +# CLAGS = -O3 +# LDFLAGS = -flto diff --git a/landerctl/include/landerctl.h b/landerctl/include/landerctl.h new file mode 100644 index 0000000..59a6301 --- /dev/null +++ b/landerctl/include/landerctl.h @@ -0,0 +1,23 @@ +#ifndef LANDERCTL +#define LANDERCTL + +typedef struct landerctl_cfg { + const char *api_key; +} landerctl_cfg; + +typedef enum landerctl_cfg_err { + landerctl_cfg_err_ok = 0, + landerctl_cfg_err_not_found, + landerctl_cfg_err_invalid, + landerctl_cfg_err_incomplete, +} landerctl_cfg_err; + +/** + * Try to parse the required config arguments from the config file + * + * @param out config to write values to. Existing values are overwritten + * @param path path to config file + */ +landerctl_cfg_err landerctl_cfg_parse(landerctl_cfg *out, const char *path); + +#endif diff --git a/landerctl/src/cfg_parse.c b/landerctl/src/cfg_parse.c new file mode 100644 index 0000000..4a681dc --- /dev/null +++ b/landerctl/src/cfg_parse.c @@ -0,0 +1,59 @@ +#include +#include +#include +#include + +#include "landerctl.h" + +static const char cfg_line_regex_expr[] = "^([^ ]+) *= *([^ ]+)$"; + +landerctl_cfg_err landerctl_cfg_parse(landerctl_cfg *out, const char *path) { + FILE *f = fopen(path, "r"); + + if (f == NULL) { + return landerctl_cfg_err_not_found; + } + + regex_t cfg_line_regex; + regcomp(&cfg_line_regex, cfg_line_regex_expr, REG_EXTENDED); + + // Accept lines of at most 256 lines + char line[256]; + landerctl_cfg_err res = landerctl_cfg_err_incomplete; + + while (fgets(line, sizeof(line), f) != NULL) { + // Last character might be a newline + size_t len = strlen(line); + + if (line[len - 1] == '\n') { + line[len - 1] = '\0'; + } + + regmatch_t reg_groups[3]; + + if (regexec(&cfg_line_regex, line, 3, reg_groups, 0) != 0) { + res = landerctl_cfg_err_not_found; + + break; + } + + // api_key is currently the only value we parse + int key_len = reg_groups[1].rm_eo - reg_groups[1].rm_so; + + if ((strlen("api_key") == key_len) && + (strncmp("api_key", &line[reg_groups[1].rm_so], key_len) == 0)) { + int val_len = reg_groups[2].rm_eo - reg_groups[2].rm_so; + char *buf = malloc(val_len + 1); + strncpy(buf, &line[reg_groups[2].rm_so], val_len); + + out->api_key = buf; + + res = landerctl_cfg_err_ok; + break; + } + } + + fclose(f); + + return res; +} diff --git a/landerctl/src/main.c b/landerctl/src/main.c new file mode 100644 index 0000000..3dd0dae --- /dev/null +++ b/landerctl/src/main.c @@ -0,0 +1,87 @@ +#include +#include +#include +#include + +#include +#include + +#include "landerctl.h" + +const char default_cfg_path[] = ".landerrc"; + +int main(int argc, char **argv) { + landerctl_cfg cfg; + char *err_msg = NULL; + + switch (landerctl_cfg_parse(&cfg, default_cfg_path)) { + case landerctl_cfg_err_ok: + break; + case landerctl_cfg_err_not_found: + err_msg = "Config file not found"; + break; + case landerctl_cfg_err_invalid: + err_msg = "Invalid config file"; + break; + case landerctl_cfg_err_incomplete: + err_msg = "Incomplete config file"; + break; + } + + if (err_msg != NULL) { + fprintf(stderr, "%s\n", err_msg); + exit(1); + } + + /* struct stat sb; */ + + /* stat(argv[1], &sb); */ + + /* printf("file size: %lu\n", sb.st_size); */ + + /* FILE *f = fopen(argv[1], "rb"); */ + + /* if (f == NULL) { */ + /* printf("Couldn't open file.\n"); */ + /* exit(1); */ + /* } */ + + /* curl_global_init(CURL_GLOBAL_ALL); */ + + /* CURL *curl = curl_easy_init(); */ + + /* if (curl == NULL) { */ + /* exit(1); */ + /* } */ + + /* curl_easy_setopt(curl, CURLOPT_URL, "http://localhost:18080/f/"); */ + /* curl_easy_setopt(curl, CURLOPT_UPLOAD, 1L); */ + /* curl_easy_setopt(curl, CURLOPT_READDATA, f); */ + + /* curl_off_t file_size = sb.st_size; */ + /* /1* curl_easy_setopt(curl, CURLOPT_INFILESIZE_LARGE, file_size); *1/ */ + /* curl_easy_setopt(curl, CURLOPT_POST, 1L); */ + /* curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE_LARGE, file_size); */ + + /* magic_t cookie = magic_open(MAGIC_MIME_TYPE); */ + /* magic_load(cookie, NULL); */ + /* const char *mime_type = magic_file(cookie, argv[1]); */ + + /* char content_type_header[strlen(mime_type) + 24]; */ + /* sprintf(content_type_header, "X-Lander-Content-Type: %s", mime_type); */ + + /* char content_length_header[32]; */ + /* sprintf(content_length_header, "Content-Length: %lu", sb.st_size); */ + + /* struct curl_slist *list = NULL; */ + /* list = curl_slist_append(list, content_type_header); */ + /* list = curl_slist_append(list, content_length_header); */ + /* list = curl_slist_append(list, "X-Api-Key: test"); */ + + /* curl_easy_setopt(curl, CURLOPT_HTTPHEADER, list); */ + + /* curl_easy_setopt(curl, CURLOPT_VERBOSE, 1); */ + /* curl_easy_perform(curl); */ + + /* curl_slist_free_all(list); */ +} From 810bfd2bc91ceed964fbe0f115289b2f047bace8 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Wed, 15 Nov 2023 14:50:02 +0100 Subject: [PATCH 68/70] feat(landerctl): support posting redirects --- landerctl/.landerrc | 1 + landerctl/include/landerctl.h | 19 +++++++ landerctl/src/cfg_parse.c | 46 ++++++++++++---- landerctl/src/main.c | 99 +++++++++++++++++++++++++++++++++-- landerctl/src/post.c | 60 +++++++++++++++++++++ 5 files changed, 212 insertions(+), 13 deletions(-) create mode 100644 landerctl/src/post.c diff --git a/landerctl/.landerrc b/landerctl/.landerrc index db525c7..964d3f2 100644 --- a/landerctl/.landerrc +++ b/landerctl/.landerrc @@ -1 +1,2 @@ api_key = test +server_url = http://localhost:18080 diff --git a/landerctl/include/landerctl.h b/landerctl/include/landerctl.h index 59a6301..7e01ed2 100644 --- a/landerctl/include/landerctl.h +++ b/landerctl/include/landerctl.h @@ -1,8 +1,13 @@ #ifndef LANDERCTL #define LANDERCTL +#include + +#include + typedef struct landerctl_cfg { const char *api_key; + const char *server_url; } landerctl_cfg; typedef enum landerctl_cfg_err { @@ -20,4 +25,18 @@ typedef enum landerctl_cfg_err { */ landerctl_cfg_err landerctl_cfg_parse(landerctl_cfg *out, const char *path); +typedef enum landerctl_mode { + landerctl_mode_none = 0, + landerctl_mode_short, + landerctl_mode_paste, + landerctl_mode_file, +} landerctl_mode; + +struct curl_slist *landerctl_set_common(const landerctl_cfg *cfg, CURL *curl, + landerctl_mode mode, bool secure, + const char *key); +void landerctl_post_short(CURL *curl, const char *url); +void landerctl_post_paste(CURL *curl, const char *path); +void landerctl_post_file(CURL *curl, const char *path); + #endif diff --git a/landerctl/src/cfg_parse.c b/landerctl/src/cfg_parse.c index 4a681dc..032a04b 100644 --- a/landerctl/src/cfg_parse.c +++ b/landerctl/src/cfg_parse.c @@ -14,12 +14,27 @@ landerctl_cfg_err landerctl_cfg_parse(landerctl_cfg *out, const char *path) { return landerctl_cfg_err_not_found; } + struct { + const char *key; + const char **var; + } key_to_vars[] = { + {"api_key", &out->api_key}, + {"server_url", &out->server_url}, + }; + size_t key_to_vars_len = sizeof(key_to_vars) / sizeof(key_to_vars[0]); + + // We NULL everything beforehand so we can check if we have all needed + // variables + for (size_t i = 0; i < key_to_vars_len; i++) { + *key_to_vars[i].var = NULL; + } + regex_t cfg_line_regex; regcomp(&cfg_line_regex, cfg_line_regex_expr, REG_EXTENDED); // Accept lines of at most 256 lines char line[256]; - landerctl_cfg_err res = landerctl_cfg_err_incomplete; + landerctl_cfg_err res = landerctl_cfg_err_ok; while (fgets(line, sizeof(line), f) != NULL) { // Last character might be a newline @@ -38,18 +53,29 @@ landerctl_cfg_err landerctl_cfg_parse(landerctl_cfg *out, const char *path) { } // api_key is currently the only value we parse - int key_len = reg_groups[1].rm_eo - reg_groups[1].rm_so; + size_t key_len = reg_groups[1].rm_eo - reg_groups[1].rm_so; - if ((strlen("api_key") == key_len) && - (strncmp("api_key", &line[reg_groups[1].rm_so], key_len) == 0)) { - int val_len = reg_groups[2].rm_eo - reg_groups[2].rm_so; - char *buf = malloc(val_len + 1); - strncpy(buf, &line[reg_groups[2].rm_so], val_len); + for (size_t i = 0; i < key_to_vars_len; i++) { + if ((key_len == strlen(key_to_vars[i].key)) && + (strncmp(&line[reg_groups[1].rm_so], key_to_vars[i].key, key_len) == + 0)) { + int val_len = reg_groups[2].rm_eo - reg_groups[2].rm_so; + char *buf = malloc(val_len + 1); + strncpy(buf, &line[reg_groups[2].rm_so], val_len); + buf[val_len] = '\0'; - out->api_key = buf; + *key_to_vars[i].var = buf; + break; + } + } + } - res = landerctl_cfg_err_ok; - break; + if (res == landerctl_cfg_err_ok) { + for (size_t i = 0; i < key_to_vars_len; i++) { + if (*key_to_vars[i].var == NULL) { + res = landerctl_cfg_err_incomplete; + break; + } } } diff --git a/landerctl/src/main.c b/landerctl/src/main.c index 3dd0dae..a88d0cc 100644 --- a/landerctl/src/main.c +++ b/landerctl/src/main.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -8,7 +9,8 @@ #include "landerctl.h" -const char default_cfg_path[] = ".landerrc"; +const char *default_cfg_path = ".landerrc"; +const char *usage = "%s [-SPFsv] arg [key]\n"; int main(int argc, char **argv) { landerctl_cfg cfg; @@ -33,6 +35,99 @@ int main(int argc, char **argv) { exit(1); } + opterr = 0; + + int c; + landerctl_mode mode = landerctl_mode_none; + bool secure = false; + bool verbose = false; + + while ((c = getopt(argc, argv, "SPFsv")) != -1) { + switch (c) { + case 'S': + mode = landerctl_mode_short; + break; + case 'P': + mode = landerctl_mode_paste; + break; + case 'F': + mode = landerctl_mode_file; + break; + case 's': + secure = true; + break; + case 'v': + verbose = true; + break; + case '?': + printf(usage, argv[0]); + exit(2); + } + } + + if (mode == landerctl_mode_none) { + printf("No mode specified.\n\n"); + printf(usage, argv[0]); + exit(2); + } + + if (optind == argc || (argc - optind > 2)) { + printf(usage, argv[0]); + exit(2); + } + + const char *arg = argv[optind]; + const char *key = argc - optind == 2 ? argv[optind + 1] : NULL; + + curl_global_init(CURL_GLOBAL_ALL); + CURL *curl = curl_easy_init(); + + if (curl == NULL) { + exit(255); + } + + struct curl_slist *list = landerctl_set_common(&cfg, curl, mode, secure, key); + + switch (mode) { + case landerctl_mode_short: + landerctl_post_short(curl, arg); + break; + } + + if (verbose) { + curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L); + } + + int exit_code = 0; + + if (curl_easy_perform(curl) == CURLE_OK) { + long response_code; + curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &response_code); + + if (response_code < 200 || response_code > 299) { + fprintf(stderr, "HTTP status code %li\n", response_code); + exit_code = 3; + } else { + struct curl_header *location_header; + + if (curl_easy_header(curl, "Location", 0, CURLH_HEADER, -1, + &location_header) == CURLHE_OK) { + printf("%s%s\n", cfg.server_url, location_header->value); + } else { + fprintf(stderr, "Server returned a 2xx without a Location header.\n"); + exit_code = 5; + } + } + } else { + fprintf(stderr, "Libcurl encountered an error.\n"); + exit_code = 4; + } + + curl_easy_cleanup(curl); + curl_slist_free_all(list); + + return exit_code; + /* struct stat sb; */ /* stat(argv[1], &sb); */ @@ -46,8 +141,6 @@ int main(int argc, char **argv) { /* exit(1); */ /* } */ - /* curl_global_init(CURL_GLOBAL_ALL); */ - /* CURL *curl = curl_easy_init(); */ /* if (curl == NULL) { */ diff --git a/landerctl/src/post.c b/landerctl/src/post.c new file mode 100644 index 0000000..c0dd653 --- /dev/null +++ b/landerctl/src/post.c @@ -0,0 +1,60 @@ +#include +#include + +#include "landerctl.h" + +struct curl_slist *landerctl_set_common(const landerctl_cfg *cfg, CURL *curl, + landerctl_mode mode, bool secure, + const char *key) { + size_t url_len = strlen(cfg->server_url) + 4; + + if (key != NULL) { + url_len += strlen(key); + } + + char mode_char; + + switch (mode) { + case landerctl_mode_short: + mode_char = 's'; + break; + case landerctl_mode_paste: + mode_char = 'p'; + break; + case landerctl_mode_file: + mode_char = 'f'; + break; + // Shouldn't be able to happen + default: + return NULL; + } + + char url[url_len + 1]; + + if (key == NULL) { + sprintf(url, "%s/%c%s/", cfg->server_url, mode_char, secure ? "l" : ""); + } else { + sprintf(url, "%s/%c%s/%s", cfg->server_url, mode_char, secure ? "l" : "", + key); + } + + curl_easy_setopt(curl, CURLOPT_URL, url); + + // Add API key header + char api_key_header[strlen(cfg->api_key) + 12]; + sprintf(api_key_header, "X-Api-Key: %s", cfg->api_key); + + struct curl_slist *list = NULL; + list = curl_slist_append(list, api_key_header); + + curl_easy_setopt(curl, CURLOPT_HTTPHEADER, list); + + curl_easy_setopt(curl, CURLOPT_USERAGENT, "landerctl/" LANDER_VERSION ""); + + return list; +} + +void landerctl_post_short(CURL *curl, const char *url) { + curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE, strlen(url)); + curl_easy_setopt(curl, CURLOPT_POSTFIELDS, url); +} From 92d6d8325664b61e7d4bd4b90c6f572d2522b357 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Thu, 16 Nov 2023 11:30:02 +0100 Subject: [PATCH 69/70] feat(landerctl): support all entry types --- CHANGELOG.md | 3 + landerctl/include/landerctl.h | 29 +++++++-- landerctl/src/main.c | 69 +++++++++++++-------- landerctl/src/post.c | 113 ++++++++++++++++++++++++++-------- 4 files changed, 156 insertions(+), 58 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3795d4a..9d590a5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * Add support for hosting arbitrary files * Content type of file is set if provided when uploading file * Support removing entries +* Landerctl + * Replaced old Bash script with Libcurl-based application + * Supporting posting redirects, pastes & arbitrarily large files ## [0.1.0](https://git.rustybever.be/Chewing_Bever/lander/src/tag/0.1.0) diff --git a/landerctl/include/landerctl.h b/landerctl/include/landerctl.h index 7e01ed2..28a231a 100644 --- a/landerctl/include/landerctl.h +++ b/landerctl/include/landerctl.h @@ -32,11 +32,28 @@ typedef enum landerctl_mode { landerctl_mode_file, } landerctl_mode; -struct curl_slist *landerctl_set_common(const landerctl_cfg *cfg, CURL *curl, - landerctl_mode mode, bool secure, - const char *key); -void landerctl_post_short(CURL *curl, const char *url); -void landerctl_post_paste(CURL *curl, const char *path); -void landerctl_post_file(CURL *curl, const char *path); +typedef enum landerctl_err { + landerctl_err_ok = 0, + landerctl_err_not_found +} landerctl_err; + +typedef struct landerctl_ctx { + landerctl_cfg cfg; + landerctl_mode mode; + bool secure; + bool verbose; + const char *arg; + const char *key; + CURL *curl; + struct curl_slist *headers; + FILE *data_file; +} landerctl_ctx; + +const char *landerctl_err_msg(landerctl_err err); + +void landerctl_set_common(landerctl_ctx *ctx); +landerctl_err landerctl_post_short(landerctl_ctx *ctx); +landerctl_err landerctl_post_paste(landerctl_ctx *ctx); +landerctl_err landerctl_post_file(landerctl_ctx *ctx); #endif diff --git a/landerctl/src/main.c b/landerctl/src/main.c index a88d0cc..7805635 100644 --- a/landerctl/src/main.c +++ b/landerctl/src/main.c @@ -13,10 +13,10 @@ const char *default_cfg_path = ".landerrc"; const char *usage = "%s [-SPFsv] arg [key]\n"; int main(int argc, char **argv) { - landerctl_cfg cfg; + landerctl_ctx ctx = {0}; char *err_msg = NULL; - switch (landerctl_cfg_parse(&cfg, default_cfg_path)) { + switch (landerctl_cfg_parse(&ctx.cfg, default_cfg_path)) { case landerctl_cfg_err_ok: break; case landerctl_cfg_err_not_found: @@ -38,26 +38,23 @@ int main(int argc, char **argv) { opterr = 0; int c; - landerctl_mode mode = landerctl_mode_none; - bool secure = false; - bool verbose = false; while ((c = getopt(argc, argv, "SPFsv")) != -1) { switch (c) { case 'S': - mode = landerctl_mode_short; + ctx.mode = landerctl_mode_short; break; case 'P': - mode = landerctl_mode_paste; + ctx.mode = landerctl_mode_paste; break; case 'F': - mode = landerctl_mode_file; + ctx.mode = landerctl_mode_file; break; case 's': - secure = true; + ctx.secure = true; break; case 'v': - verbose = true; + ctx.verbose = true; break; case '?': printf(usage, argv[0]); @@ -65,7 +62,7 @@ int main(int argc, char **argv) { } } - if (mode == landerctl_mode_none) { + if (ctx.mode == landerctl_mode_none) { printf("No mode specified.\n\n"); printf(usage, argv[0]); exit(2); @@ -76,33 +73,49 @@ int main(int argc, char **argv) { exit(2); } - const char *arg = argv[optind]; - const char *key = argc - optind == 2 ? argv[optind + 1] : NULL; + ctx.arg = argv[optind]; + ctx.key = argc - optind == 2 ? argv[optind + 1] : NULL; curl_global_init(CURL_GLOBAL_ALL); - CURL *curl = curl_easy_init(); + ctx.curl = curl_easy_init(); - if (curl == NULL) { + if (ctx.curl == NULL) { exit(255); } - struct curl_slist *list = landerctl_set_common(&cfg, curl, mode, secure, key); + landerctl_set_common(&ctx); + landerctl_err res; - switch (mode) { + switch (ctx.mode) { case landerctl_mode_short: - landerctl_post_short(curl, arg); + res = landerctl_post_short(&ctx); break; + case landerctl_mode_paste: + res = landerctl_post_paste(&ctx); + break; + case landerctl_mode_file: + res = landerctl_post_file(&ctx); + break; + default: + return 7; } - if (verbose) { - curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L); + if (res != landerctl_err_ok) { + printf("%s\n", landerctl_err_msg(res)); + exit(6); } + if (ctx.verbose) { + curl_easy_setopt(ctx.curl, CURLOPT_VERBOSE, 1L); + } + + curl_easy_setopt(ctx.curl, CURLOPT_HTTPHEADER, ctx.headers); + int exit_code = 0; - if (curl_easy_perform(curl) == CURLE_OK) { + if (curl_easy_perform(ctx.curl) == CURLE_OK) { long response_code; - curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &response_code); + curl_easy_getinfo(ctx.curl, CURLINFO_RESPONSE_CODE, &response_code); if (response_code < 200 || response_code > 299) { fprintf(stderr, "HTTP status code %li\n", response_code); @@ -110,9 +123,9 @@ int main(int argc, char **argv) { } else { struct curl_header *location_header; - if (curl_easy_header(curl, "Location", 0, CURLH_HEADER, -1, + if (curl_easy_header(ctx.curl, "Location", 0, CURLH_HEADER, -1, &location_header) == CURLHE_OK) { - printf("%s%s\n", cfg.server_url, location_header->value); + printf("%s%s\n", ctx.cfg.server_url, location_header->value); } else { fprintf(stderr, "Server returned a 2xx without a Location header.\n"); exit_code = 5; @@ -123,8 +136,12 @@ int main(int argc, char **argv) { exit_code = 4; } - curl_easy_cleanup(curl); - curl_slist_free_all(list); + curl_easy_cleanup(ctx.curl); + curl_slist_free_all(ctx.headers); + + if (ctx.data_file != NULL) { + fclose(ctx.data_file); + } return exit_code; diff --git a/landerctl/src/post.c b/landerctl/src/post.c index c0dd653..76e5b04 100644 --- a/landerctl/src/post.c +++ b/landerctl/src/post.c @@ -1,20 +1,31 @@ -#include +#include #include +#include + +#include +#include #include "landerctl.h" -struct curl_slist *landerctl_set_common(const landerctl_cfg *cfg, CURL *curl, - landerctl_mode mode, bool secure, - const char *key) { - size_t url_len = strlen(cfg->server_url) + 4; +const char *landerctl_err_msg(landerctl_err err) { + switch (err) { + case landerctl_err_not_found: + return "File not found"; + default: + return ""; + } +} - if (key != NULL) { - url_len += strlen(key); +void landerctl_set_common(landerctl_ctx *ctx) { + size_t url_len = strlen(ctx->cfg.server_url) + 4; + + if (ctx->key != NULL) { + url_len += strlen(ctx->key); } char mode_char; - switch (mode) { + switch (ctx->mode) { case landerctl_mode_short: mode_char = 's'; break; @@ -26,35 +37,85 @@ struct curl_slist *landerctl_set_common(const landerctl_cfg *cfg, CURL *curl, break; // Shouldn't be able to happen default: - return NULL; + return; } char url[url_len + 1]; - if (key == NULL) { - sprintf(url, "%s/%c%s/", cfg->server_url, mode_char, secure ? "l" : ""); + if (ctx->key == NULL) { + sprintf(url, "%s/%c%s/", ctx->cfg.server_url, mode_char, + ctx->secure ? "l" : ""); } else { - sprintf(url, "%s/%c%s/%s", cfg->server_url, mode_char, secure ? "l" : "", - key); + sprintf(url, "%s/%c%s/%s", ctx->cfg.server_url, mode_char, + ctx->secure ? "l" : "", ctx->key); } - curl_easy_setopt(curl, CURLOPT_URL, url); + curl_easy_setopt(ctx->curl, CURLOPT_URL, url); // Add API key header - char api_key_header[strlen(cfg->api_key) + 12]; - sprintf(api_key_header, "X-Api-Key: %s", cfg->api_key); + char api_key_header[strlen(ctx->cfg.api_key) + 12]; + sprintf(api_key_header, "X-Api-Key: %s", ctx->cfg.api_key); - struct curl_slist *list = NULL; - list = curl_slist_append(list, api_key_header); + ctx->headers = curl_slist_append(NULL, api_key_header); - curl_easy_setopt(curl, CURLOPT_HTTPHEADER, list); - - curl_easy_setopt(curl, CURLOPT_USERAGENT, "landerctl/" LANDER_VERSION ""); - - return list; + curl_easy_setopt(ctx->curl, CURLOPT_USERAGENT, + "landerctl/" LANDER_VERSION ""); } -void landerctl_post_short(CURL *curl, const char *url) { - curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE, strlen(url)); - curl_easy_setopt(curl, CURLOPT_POSTFIELDS, url); +landerctl_err landerctl_post_short(landerctl_ctx *ctx) { + curl_easy_setopt(ctx->curl, CURLOPT_POSTFIELDSIZE, strlen(ctx->arg)); + curl_easy_setopt(ctx->curl, CURLOPT_POSTFIELDS, ctx->arg); + + return landerctl_err_ok; +} + +landerctl_err landerctl_post_paste(landerctl_ctx *ctx) { + ctx->data_file = fopen(ctx->arg, "rb"); + + if (ctx->data_file == NULL) { + return landerctl_err_not_found; + } + + struct stat sb; + stat(ctx->arg, &sb); + + curl_easy_setopt(ctx->curl, CURLOPT_POST, 1L); + curl_easy_setopt(ctx->curl, CURLOPT_READDATA, ctx->data_file); + curl_easy_setopt(ctx->curl, CURLOPT_POSTFIELDSIZE, sb.st_size); + + return landerctl_err_ok; +} + +landerctl_err landerctl_post_file(landerctl_ctx *ctx) { + ctx->data_file = fopen(ctx->arg, "rb"); + + if (ctx->data_file == NULL) { + return landerctl_err_not_found; + } + + struct stat sb; + stat(ctx->arg, &sb); + + curl_easy_setopt(ctx->curl, CURLOPT_POST, 1L); + curl_easy_setopt(ctx->curl, CURLOPT_READDATA, ctx->data_file); + curl_easy_setopt(ctx->curl, CURLOPT_POSTFIELDSIZE_LARGE, sb.st_size); + + magic_t cookie = magic_open(MAGIC_MIME_TYPE); + magic_load(cookie, NULL); + const char *mime_type = magic_file(cookie, ctx->arg); + + char content_type_header[strlen(mime_type) + 24]; + sprintf(content_type_header, "X-Lander-Content-Type: %s", mime_type); + + char s[strlen(ctx->arg) + 1]; + strcpy(s, ctx->arg); + const char *base_name = basename(s); + + char filename_header[strlen(base_name) + 20]; + sprintf(filename_header, "X-Lander-Filename: %s", base_name); + + ctx->headers = curl_slist_append(ctx->headers, content_type_header); + ctx->headers = curl_slist_append(ctx->headers, filename_header); + + return landerctl_err_ok; } From 59da997828d4f2307a4a832f19a614d5cfb2c0ad Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Thu, 16 Nov 2023 11:30:56 +0100 Subject: [PATCH 70/70] chore(ci): once again remove deploy hook --- .woodpecker/docker.yml | 9 --------- 1 file changed, 9 deletions(-) diff --git a/.woodpecker/docker.yml b/.woodpecker/docker.yml index 9883282..3085f23 100644 --- a/.woodpecker/docker.yml +++ b/.woodpecker/docker.yml @@ -15,12 +15,3 @@ pipeline: - 'docker_password' when: event: push - - deploy: - image: 'curlimages/curl' - secrets: - - 'webhook_url' - commands: - - 'curl -XPOST -s --fail $WEBHOOK_URL' - when: - event: push