diff --git a/.dockerignore b/.dockerignore index 5f57aa4..9bda86a 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,6 +1,15 @@ * !src/ -!Makefile -!thirdparty/ !include/ + +!lsm/src/ +!lsm/include/ +!lsm/Makefile +!lsm/config.mk + +!thirdparty/include +!thirdparty/src + +!Makefile +!config.mk diff --git a/.gitignore b/.gitignore index 35f41da..6f9958c 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ lander.data* pastes/ .cache/ vgcore.* +data/ diff --git a/.woodpecker.yml b/.woodpecker.yml deleted file mode 100644 index 83600f1..0000000 --- a/.woodpecker.yml +++ /dev/null @@ -1,22 +0,0 @@ -platform: 'linux/amd64' -branches: 'main' - -pipeline: - docker: - image: 'plugins/docker' - settings: - registry: 'git.rustybever.be' - repo: 'git.rustybever.be/chewing_bever/lander' - tag: - - 'latest' - mtu: 1300 - secrets: - - 'docker_username' - - 'docker_password' - - deploy: - image: 'curlimages/curl' - secrets: - - 'webhook_url' - commands: - - 'curl -XPOST -s --fail $WEBHOOK_URL' diff --git a/.woodpecker/build.yml b/.woodpecker/build.yml new file mode 100644 index 0000000..5cdc7af --- /dev/null +++ b/.woodpecker/build.yml @@ -0,0 +1,62 @@ +matrix: + PLATFORM: + - 'linux/amd64' + +platform: ${PLATFORM} + +pipeline: + build-lander: + image: 'alpine:edge' + commands: + - apk add --no-cache build-base make + - make CFLAGS='-O3' LDFLAGS='-flto -static' + - strip -s build/lander + - du -h build/lander + - '[ "$(readelf -d build/lander | grep NEEDED | wc -l)" = 0 ]' + + build-landerctl: + image: 'alpine:3.18.0' + commands: + - > + apk add --no-cache + build-base git make curl-static curl-dev file-dev libmagic-static + openssl-libs-static openssl-dev libidn2-static libidn2-dev + nghttp2-static nghttp2-dev libunistring-static libunistring-dev + brotli-static brotli-dev zlib-static zlib-dev + - make -C landerctl CFLAGS='-O3' LDFLAGS='-flto -static' LIBS='magic curl ssl nghttp2 crypto idn2 unistring brotlidec brotlienc brotlicommon z' + - strip -s landerctl/build/landerctl + - du -h landerctl/build/landerctl + - '[ "$(readelf -d landerctl/build/landerctl | grep NEEDED | wc -l)" = 0 ]' + + publish-dev: + image: 'alpine:3.18.0' + group: publish + commands: + - apk add --no-cache minio-client + - mcli alias set rb 'https://s3.rustybever.be' "$MINIO_ACCESS_KEY" "$MINIO_SECRET_KEY" + - mcli cp build/lander landerctl/build/landerctl "rb/lander/commits/$CI_COMMIT_SHA/" + secrets: + - minio_access_key + - minio_secret_key + when: + branch: + exclude: [ release/* ] + + publish-rel: + image: 'alpine:3.18.0' + group: publish + commands: + - > + curl -s --fail + --user "Chewing_Bever:$GITEA_PASSWORD" + --upload-file build/lander + https://git.rustybever.be/api/packages/Chewing_Bever/generic/lander/"${CI_COMMIT_TAG}"/lander-"$(echo '${PLATFORM}' | sed 's:/:-:g')" + - > + curl -s --fail + --user "Chewing_Bever:$GITEA_PASSWORD" + --upload-file landerctl/build/landerctl + https://git.rustybever.be/api/packages/Chewing_Bever/generic/lander/"${CI_COMMIT_TAG}"/landerctl-"$(echo '${PLATFORM}' | sed 's:/:-:g')" + secrets: + - gitea_password + when: + event: tag diff --git a/.woodpecker/docker.yml b/.woodpecker/docker.yml index 9883282..3085f23 100644 --- a/.woodpecker/docker.yml +++ b/.woodpecker/docker.yml @@ -15,12 +15,3 @@ pipeline: - 'docker_password' when: event: push - - deploy: - image: 'curlimages/curl' - secrets: - - 'webhook_url' - commands: - - 'curl -XPOST -s --fail $WEBHOOK_URL' - when: - event: push diff --git a/.woodpecker/rel.yml b/.woodpecker/rel.yml deleted file mode 100644 index 28b882d..0000000 --- a/.woodpecker/rel.yml +++ /dev/null @@ -1,31 +0,0 @@ -matrix: - PLATFORM: - - 'linux/amd64' - - 'linux/arm64' - -platform: ${PLATFORM} -branches: [ main ] - -pipeline: - build: - image: 'alpine:3.18.0' - commands: - - apk add --update --no-cache build-base make - - make CFLAGS='-O3 -static' LDFLAGS='-flto' - - strip build/lander - - '[ "$(readelf -d build/lander | grep NEEDED | wc -l)" = 0 ]' - when: - event: tag - - publish: - image: 'curlimages/curl' - secrets: - - gitea_password - commands: - - > - curl -s --fail - --user "Chewing_Bever:$GITEA_PASSWORD" - --upload-file build/lander - https://git.rustybever.be/api/packages/Chewing_Bever/generic/lander/"${CI_COMMIT_TAG}"/lander-"$(echo '${PLATFORM}' | sed 's:/:-:g')" - when: - event: tag diff --git a/CHANGELOG.md b/CHANGELOG.md index 0709613..82b5c0c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,29 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased](https://git.rustybever.be/Chewing_Bever/lander/src/branch/dev) +## [0.2.0](https://git.rustybever.be/Chewing_Bever/lander/src/tag/0.2.0) + +### Added + +* HTTP Loop + * Fully decoupled functionality from Lander-specific code + * Users can now define custom global & request-local contexts + * Introduced "response steps", allowing custom code during the response + part of a request +* LSM - Lander Storage Module + * Rewrite of trie codebase + * Introduced a custom data store using an in-memory trie as index and a + custom binary on-disk format + * Support for lookup, insert & a basic remove +* Lander + * Replaced old trie implementation with LSM store + * Add support for hosting arbitrary files + * Content type of file is set if provided when uploading file + * Support removing entries +* Landerctl + * Replaced old Bash script with Libcurl-based application + * Supporting posting redirects, pastes & arbitrarily large files + ## [0.1.0](https://git.rustybever.be/Chewing_Bever/lander/src/tag/0.1.0) ### Added diff --git a/Dockerfile b/Dockerfile index 7fc77f2..dd4eb42 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,11 +1,12 @@ -FROM alpine:3.18.0 AS builder +FROM ubuntu:23.10 AS builder ARG DI_VER=1.2.5 -RUN apk add --update --no-cache \ - build-base \ - make \ - curl +RUN apt update && \ + apt install -y --no-install-recommends \ + curl ca-certificates \ + build-essential \ + musl musl-dev musl-tools WORKDIR /app @@ -18,8 +19,9 @@ RUN curl -Lo - "https://github.com/Yelp/dumb-init/archive/refs/tags/v${DI_VER}.t COPY . ./ -RUN make CFLAGS='-O3 -static' LDFLAGS='-flto' && \ +RUN make CFLAGS='-O3' LDFLAGS='-flto -static' && \ strip build/lander && \ + readelf -d build/lander && \ [ "$(readelf -d build/lander | grep NEEDED | wc -l)" = 0 ] diff --git a/Makefile b/Makefile index 06ab346..f2c448c 100644 --- a/Makefile +++ b/Makefile @@ -1,15 +1,10 @@ # https://spin.atomicobject.com/2016/08/26/makefile-c-projects/ was a great # base for this Makefile -VERSION := 0.1.0 +-include config.mk -BIN_FILENAME ?= lander - -BUILD_DIR ?= build -SRC_DIR ?= src -TEST_DIR ?= test -THIRDPARTY_DIR ?= thirdparty -INC_DIRS ?= include $(THIRDPARTY_DIR)/include +export CFLAGS +export LDFLAGS BIN := $(BUILD_DIR)/$(BIN_FILENAME) @@ -28,18 +23,11 @@ BINS_TEST := $(OBJS_TEST:%.c.o=%) TARGETS_TEST := $(BINS_TEST:%=test-%) TARGETS_MEM_TEST := $(BINS_TEST:%=test-mem-%) -INC_FLAGS := $(addprefix -I,$(INC_DIRS)) - -# -MMD: generate a .d file for every source file. This file can be imported by -# make and makes make aware that a header file has been changed, ensuring an -# object file is also recompiled if only a header is changed. -# -MP: generate a dummy target for every header file (according to the docs it -# prevents some errors when removing header files) -CFLAGS ?= -MMD -MP -g -INTERNALCFLAGS := $(INC_FLAGS) $(CFLAGS) -Wall -Wextra -DLANDER_VERSION=\"$(VERSION)\" +_CFLAGS := $(addprefix -I,$(INC_DIRS)) $(CFLAGS) -Wall -Wextra -DLANDER_VERSION=\"$(VERSION)\" +_LDFLAGS := $(addprefix -L,$(LIB_DIRS)) $(addprefix -l,$(LIBS)) $(LDFLAGS) .PHONY: all -all: bin +all: $(BIN) # =====COMPILATION===== @@ -47,27 +35,41 @@ all: bin .PHONY: objs objs: $(OBJS) -.PHONY: bin -bin: $(BIN) -$(BIN): $(OBJS) - $(CC) $(INTERNALCFLAGS) $(LDFLAGS) -lm -o $@ $^ +.PHONY: liblsm +liblsm: + $(MAKE) -C lsm + +$(BIN): liblsm $(OBJS) + $(CC) -o $@ $(OBJS) $(_LDFLAGS) $(BUILD_DIR)/$(SRC_DIR)/%.c.o: $(SRC_DIR)/%.c mkdir -p $(dir $@) - $(CC) $(INTERNALCFLAGS) -c $< -o $@ + $(CC) $(_CFLAGS) -c $< -o $@ $(BUILD_DIR)/$(THIRDPARTY_DIR)/%.c.o: $(THIRDPARTY_DIR)/%.c mkdir -p $(dir $@) - $(CC) $(INTERNALCFLAGS) -c $< -o $@ + $(CC) $(_CFLAGS) -c $< -o $@ +.PHONY: bin-docker +bin-docker: + docker build -t lander . + docker container create --name lander-temp lander + docker cp -q lander-temp:/bin/lander $(BUILD_DIR)/lander-docker + docker container rm lander-temp # =====TESTING===== .PHONY: run -run: bin +run: $(BIN) LANDER_API_KEY=test \ LANDER_DATA_DIR=data \ '$(BUILD_DIR)/$(BIN_FILENAME)' +.PHONY: valgrind +valgrind: $(BIN) + LANDER_API_KEY=test \ + LANDER_DATA_DIR=data \ + valgrind '$(BUILD_DIR)/$(BIN_FILENAME)' + .PHONY: test test: $(TARGETS_TEST) @@ -95,23 +97,45 @@ $(BINS_TEST): %: %.c.o # exposed. $(BUILD_DIR)/$(TEST_DIR)/%.c.o: $(TEST_DIR)/%.c mkdir -p $(dir $@) - $(CC) $(INTERNALCFLAGS) -I$(TEST_DIR) \ + $(CC) $(_CFLAGS) -I$(TEST_DIR) \ -I$(dir $(@:$(BUILD_DIR)/$(TEST_DIR)/%=$(SRC_DIR)/%)) \ -c $< -o $@ + # =====MAINTENANCE===== .PHONY: lint lint: clang-format -n --Werror $(SRCS) $(SRCS_H) $(SRCS_H_INTERNAL) + make -C lsm lint + make -C landerctl lint .PHONY: fmt fmt: clang-format -i $(SRCS) $(SRCS_H) $(SRCS_H_INTERNAL) + make -C lsm fmt + make -C landerctl fmt + +.PHONY: check +check: + mkdir -p $(BUILD_DIR)/cppcheck + cppcheck \ + $(addprefix -I,$(INC_DIRS)) \ + --cppcheck-build-dir=$(BUILD_DIR)/cppcheck \ + --error-exitcode=1 \ + --enable=warning,style \ + --inline-suppr \ + --check-level=exhaustive \ + --quiet \ + -j$(shell nproc) \ + $(SRCS) + make -C lsm check + make -C landerctl check .PHONY: clean clean: rm -rf $(BUILD_DIR) - + $(MAKE) -C lsm clean + $(MAKE) -C landerctl clean .PHONY: bear bear: clean diff --git a/README.md b/README.md index fcdf32f..4f8d21e 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,16 @@ # Lander +Lander is an HTTP/1.1 server that acts as a URL shortener, pastebin and +file-sharing service. It's written from the ground up in C, complete with an +HTTP framework built on top of an event loop implementation based on [Build +Your Own Redis with C/C++](https://build-your-own.org/redis/). Lookup of +entries is done using an in-memory trie data structure, and on-disk storage +uses a custom binary database format. + +The codebase uses one thirdparty library, namely +[picohttpparser](https://github.com/h2o/picohttpparser) for parsing HTTP +requests. + ## The idea A URL shortener has always been on my list of things I'd like to write myself. @@ -10,12 +21,6 @@ different tries (Patricia trie, ternary trie, and a custom one). Considering these are efficient string-based search trees, this gave me the idea to use it as the backend for a URL shortener! -This implementation currently uses a ternary trie as its search tree. The -persistence model is very simple; I simply append a line to a text file every -time a URL is added, and add the lines of this file to the trie on startup. The -trie is stored completely im memory, and no I/O operations are required when -requesting a redirect. This makes the server very fast. - ## The name I gave up giving my projects original names a long time ago, so now I just use diff --git a/TRIE.md b/TRIE.md deleted file mode 100644 index a9f0802..0000000 --- a/TRIE.md +++ /dev/null @@ -1,16 +0,0 @@ -# Trie design - -The underlying data structure is based on a combination of a ternary and a -Patricia trie. - -* Nodes are classic ternary trie nodes, meaning each node contains a binary - search tree -* Each node can define a skip, like a Patricia trie, of at most 8 characters. - These skipped characters are stored directly in the structs defining the - nodes. -* While the add function relies on the fact that the input is a NULL-terminated - C string, the trie itself does not store any NULL bytes. - -The goal of this datastructure is to be as optimized as possible for search -operations with short (usually < 8 characters) keys, as this is by far the most -common operation for a URL shortener/pastebin. diff --git a/config.mk b/config.mk new file mode 100644 index 0000000..da3e0b8 --- /dev/null +++ b/config.mk @@ -0,0 +1,23 @@ +VERSION := 0.2.0 + +BIN_FILENAME = lander + +BUILD_DIR = build +SRC_DIR = src +TEST_DIR = test +THIRDPARTY_DIR = thirdparty + +INC_DIRS = include $(THIRDPARTY_DIR)/include lsm/include +LIBS = m lsm +LIB_DIRS = ./lsm/build + +# -MMD: generate a .d file for every source file. This file can be imported by +# make and makes make aware that a header file has been changed, ensuring an +# object file is also recompiled if only a header is changed. +# -MP: generate a dummy target for every header file (according to the docs it +# prevents some errors when removing header files) +CFLAGS ?= -MMD -MP -g + +# When compiling release builds, these flags are better +# CLAGS = -O3 +# LDFLAGS = -flto diff --git a/include/http/types.h b/include/http/types.h index 5b75bea..cccf0a0 100644 --- a/include/http/types.h +++ b/include/http/types.h @@ -124,7 +124,8 @@ extern const char *http_header_names[]; typedef enum http_header { http_header_connection = 0, http_header_location, - http_header_content_type + http_header_content_type, + http_header_content_disposition } http_header; typedef enum http_body_type { diff --git a/include/http_loop.h b/include/http_loop.h index b207d51..131bd6e 100644 --- a/include/http_loop.h +++ b/include/http_loop.h @@ -7,7 +7,6 @@ #include "http/req.h" #include "http/res.h" #include "http/types.h" -#include "trie.h" // Max amount of steps a route can use #define HTTP_LOOP_MAX_STEPS 17 @@ -27,11 +26,14 @@ typedef enum http_route_type { * Function describing a step in a route's processing. * * @param conn connection to process - * @return whether the processing can immediately advance to the next step. A - * step should return false if it's e.g. waiting for I/O, and can therefore not - * finish its task in the current cycle of the event loop. + * @return whether processing can proceed to the next step without performing + * I/O first. For a request step, `false` means more data needs to be read + * before the step can finish its processing. For response steps, `false` means + * there's new data in the write buffer that needs to be written. */ -typedef bool (*step)(event_loop_conn *conn); +typedef bool (*http_step)(event_loop_conn *conn); + +extern const http_step http_default_res_steps[HTTP_LOOP_MAX_STEPS]; /** * Struct describing a route a request can take. @@ -43,7 +45,8 @@ typedef struct http_route { // Compiled regex for a regex route. This value gets set at runtime when // starting the http loop regex_t *regex; - step steps[HTTP_LOOP_MAX_STEPS]; + const http_step steps[HTTP_LOOP_MAX_STEPS]; + const http_step steps_res[HTTP_LOOP_MAX_STEPS]; } http_route; /** @@ -52,9 +55,12 @@ typedef struct http_route { typedef struct http_loop_gctx { http_route *routes; size_t route_count; - Trie *trie; + void *(*custom_ctx_init)(); + void (*custom_ctx_reset)(void *); + void (*custom_ctx_free)(void *); const char *api_key; - const char *data_dir; + // Custom global context + void *c; } http_loop_gctx; /** @@ -73,6 +79,7 @@ typedef struct http_loop_ctx { http_route *route; size_t current_step; http_loop_gctx *g; + void *c; } http_loop_ctx; /** @@ -98,6 +105,11 @@ void http_loop_ctx_reset(http_loop_ctx *ctx); */ void http_loop_ctx_free(http_loop_ctx *ctx); +/** + * Represents an HTTP loop + */ +typedef struct event_loop http_loop; + /** * Process incoming data as an HTTP request. This is the "handle_data" function * for the event loop. @@ -107,14 +119,6 @@ void http_loop_ctx_free(http_loop_ctx *ctx); */ bool http_loop_handle_request(event_loop_conn *conn); -/** - * Write the HTTP response to the file descriptor. This is the "write_data" - * function for the event loop. - * - * @param conn connection to process - */ -void http_loop_write_response(event_loop_conn *conn); - /** * Try to parse the incoming data as an HTTP request. * @@ -139,6 +143,14 @@ void http_loop_route_request(event_loop_conn *conn); */ void http_loop_process_request(event_loop_conn *conn); +/** + * Handles the response processing. This is the `write_data` function for the + * event loop. + * + * @param conn connection to process + */ +void http_loop_handle_response(event_loop_conn *conn); + /** * Request step that consumes the request body and stores it in a buffer. * @@ -155,6 +167,13 @@ bool http_loop_step_body_to_buf(event_loop_conn *conn); */ bool http_loop_step_body_to_file(event_loop_conn *conn); +/** + * Try to parse the Content-Length header. + * + * @param conn connection to process + */ +bool http_loop_step_parse_content_length(event_loop_conn *conn); + /** * Authenticate the request using the X-Api-Key header. * @@ -171,13 +190,45 @@ bool http_loop_step_auth(event_loop_conn *conn); */ bool http_loop_step_switch_res(event_loop_conn *conn); +/** + * Write the HTTP header back to the connection. If `res->head` is not set, a + * header will be generated for you. + * + * @param conn connection to process + */ +bool http_loop_step_write_header(event_loop_conn *conn); + +/** + * Write the HTTP body back to the connection. + * + * @param conn connection to process + */ +bool http_loop_step_write_body(event_loop_conn *conn); + /** * Initialize a new http loop. * - * @param gctx global context for the event loop + * @param routes array of routes that should be served + * @param route_count how many elements are in `routes` + * @param custom_gctx the application's custom global context; can be NULL + * @param custom_ctx_init function to initialize a new custom context + * @param custom_ctx_reset function to reset a custom context + * @param custom_ctx_free function to free a custom context; will always be run + * after a reset * @return pointer to the newly allocated object */ -event_loop *http_loop_init(http_loop_gctx *gctx); +http_loop *http_loop_init(http_route *routes, size_t route_count, + void *custom_gctx, void *(*custom_ctx_init)(), + void(custom_ctx_reset)(void *), + void(custom_ctx_free)(void *)); + +/** + * Set the API key the authentication steps should use. + * + * @param hl HTTP loop to set key in + * @param api_key API key to use + */ +void http_loop_set_api_key(http_loop *hl, const char *api_key); /** * Run the HTTP loop. This function never returns. @@ -185,6 +236,6 @@ event_loop *http_loop_init(http_loop_gctx *gctx); * @param el the event loop * @param port on what port to listen */ -void http_loop_run(event_loop *el, int port); +void http_loop_run(http_loop *hl, int port); #endif diff --git a/include/lander.h b/include/lander.h index daf6fe7..a30c32d 100644 --- a/include/lander.h +++ b/include/lander.h @@ -2,8 +2,40 @@ #define LANDER #include "http_loop.h" +#include "lsm/store.h" -extern http_route lander_routes[4]; +extern http_route lander_routes[6]; +extern const char lander_key_charset[]; + +typedef struct lander_gctx { + const char *data_dir; + lsm_store *store; +} lander_gctx; + +typedef struct lander_ctx { + lsm_entry_handle *entry; +} lander_ctx; + +typedef enum lander_attr_type : uint8_t { + lander_attr_type_entry_type = 0, + lander_attr_type_content_type = 1, + lander_attr_type_url = 2, + lander_attr_type_file_name = 3, +} lander_attr_type; + +typedef enum lander_entry_type : uint8_t { + lander_entry_type_redirect = 0, + lander_entry_type_paste = 1, + lander_entry_type_file = 2, +} lander_entry_type; + +void *lander_gctx_init(); + +void *lander_ctx_init(); + +void lander_ctx_reset(lander_ctx *ctx); + +void lander_ctx_free(lander_ctx *ctx); bool lander_get_index(event_loop_conn *conn); @@ -13,4 +45,30 @@ bool lander_post_redirect(event_loop_conn *conn); bool lander_post_paste(event_loop_conn *conn); +bool lander_post_paste(event_loop_conn *conn); + +bool lander_post_redirect(event_loop_conn *conn); + +bool lander_stream_body_to_entry(event_loop_conn *conn); + +bool lander_stream_body_to_client(event_loop_conn *conn); + +bool lander_post_redirect_body_to_attr(event_loop_conn *conn); + +bool lander_remove_entry(event_loop_conn *conn); + +bool lander_post_file(event_loop_conn *conn); + +/** + * Store the requested header as an attribute, if it's present. + */ +void lander_header_to_attr(http_loop_ctx *ctx, const char *header, + lander_attr_type attr_type); + +/** + * Store the attribute's value as the provided header, if present. + */ +void lander_attr_to_header(http_loop_ctx *ctx, lander_attr_type attr_type, + http_header header_type); + #endif diff --git a/include/trie.h b/include/trie.h deleted file mode 100644 index 9662ed4..0000000 --- a/include/trie.h +++ /dev/null @@ -1,148 +0,0 @@ -#ifndef AD3_TERNARYTRIE -#define AD3_TERNARYTRIE - -#define ALPHABET_SIZE 256 -#define DELIMITER '\0' -#define MAX(x, y) (((x) > (y)) ? (x) : (y)) - -// Should not be higher than 254 or stuff will break -#define TRIE_MAX_SKIP_SIZE 8 - -/** - * The implementation of a Ternary Trie. - * - * Each node should be represented by a binary tree in order to reduce the - * memory usage. - */ - -#include -#include -#include - -static const char charset[] = - "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; -static const size_t charset_len = sizeof(charset) - 1; - -// Length of randomly generated keys -#define RANDOM_KEY_LENGTH_SHORT 4 -#define RANDOM_KEY_LENGTH_LONG 16 - -/** - * Type definition for the struct representing the current Trie. - * - * You can (and should) redefine this in your c-file with the concrete fields. - */ -typedef struct ttrie Trie; - -typedef enum entry_type { Redirect, Paste, Unknown } EntryType; - -typedef struct entry { - EntryType type; - char *string; -} Entry; - -typedef enum trie_exit_code { - Ok = 0, - NotFound, - AlreadyPresent, - FileError -} TrieExitCode; - -Entry *entry_new(EntryType type, const char *string); - -/** - * Allocate & initialize a new trie, and populate it with the data from the - * given data file. - * - * @return 0 if everything was successful, non-zero otherwise - */ -TrieExitCode trie_init(Trie **trie_ptr, const char *file_path); - -/** - * De-allocate a trie by freeing the memory occupied by this trie. - * - * @param trie which should be freed - */ -void trie_free(Trie *trie); - -/** - * Search for an entry in the trie. - * - * @param trie - * @param entry_ptr pointer to Entry will be stored here, if found - * @param key key representing the entry - * @return 0 if the search was successful, 1 if not found - */ -TrieExitCode trie_search(Trie *trie, Entry **entry_ptr, const char *key); - -TrieExitCode trie_search_len(Trie *trie, Entry **entry_ptr, const char *key, - size_t key_len); - -/** - * Add a string to this trie. - * - * @param trie - * @param key key to represent entry with - * @param entry entry to add - * @return 0 if added, 1 if already in trie, something else if other errors - */ -TrieExitCode trie_add(Trie *trie, const char *key, Entry *entry); - -TrieExitCode trie_add_len(Trie *trie, const char *key, size_t key_len, - Entry *entry); - -/** - * Add an entry by generating a random string as the key. - * - * @param trie - * @param entry entry to add - * @param secure whether to generate a longer, more secure random key - * @return pointer to the generated key. This pointer is safe to use after - * unlocking the trie, and should be freed manually. - */ -TrieExitCode trie_add_random(Trie *trie, char **key_ptr, Entry *entry, - bool secure); - -/** - * Remove an entry from this trie given its key. - * - * @param trie - * @param key key representing entry - * @return true if the entry was present and has been removed, false if it was - * not present - */ -bool trie_remove(Trie *trie, const char *key); - -/** - * Returns the number of entries in this trie. - * - * @param trie - * @return the number of entries in this trie - */ -size_t trie_size(Trie *trie); - -/* - * Acquire a read lock on the trie. - * - * @return 0 if successful, non-zero otherwise (return value of - * pthread_rwlock_rdlock) - */ -int trie_rlock(Trie *trie); - -/* - * Acquire a write lock on the trie. - * - * @return 0 if successful, non-zero otherwise (return value of - * pthread_rwlock_wrlock) - */ -int trie_wlock(Trie *trie); - -/* - * Release the lock on a trie after having acquired it beforehand. - * - * @return 0 if successful, non-zero otherwise (return value of - * pthread_rwlock_unlock) - */ -int trie_unlock(Trie *trie); - -#endif // AD3_TERNARYTRIE diff --git a/landerctl b/landerctl deleted file mode 100755 index c0433a5..0000000 --- a/landerctl +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/env sh - -API_KEY=test -URL=http://localhost:18080 - -if [ "$1" = g ]; then - curl -is "$URL/$2" | - sed -En 's/^[lL]ocation: (.*)/\1/p' - -elif [ "$1" = s ]; then - curl \ - -w "${URL}%header{location}" \ - -XPOST \ - -d "$2" \ - -H "X-Api-Key: $API_KEY" \ - "$URL/s/$3" - -elif [ "$1" = sl ]; then - curl \ - -w "${URL}%header{location}" \ - -XPOST \ - -d "$2" \ - -H "X-Api-Key: $API_KEY" \ - "$URL/sl/$3" - -elif [ "$1" = p ]; then - curl \ - -w "${URL}%header{location}" \ - -XPOST \ - -H "X-Api-Key: $API_KEY" \ - --data-binary @"$2" \ - "$URL/p/$3" - -elif [ "$1" = pl ]; then - curl \ - -w "${URL}%header{location}" \ - -XPOST \ - -H "X-Api-Key: $API_KEY" \ - --data-binary @"$2" \ - "$URL/pl/$3" -fi diff --git a/landerctl/.landerrc b/landerctl/.landerrc new file mode 100644 index 0000000..964d3f2 --- /dev/null +++ b/landerctl/.landerrc @@ -0,0 +1,2 @@ +api_key = test +server_url = http://localhost:18080 diff --git a/landerctl/Makefile b/landerctl/Makefile new file mode 100644 index 0000000..487e0f2 --- /dev/null +++ b/landerctl/Makefile @@ -0,0 +1,120 @@ +# https://spin.atomicobject.com/2016/08/26/makefile-c-projects/ was a great +# base for this Makefile + +-include config.mk + +export CFLAGS +export LDFLAGS + +BIN := $(BUILD_DIR)/$(BIN_FILENAME) + +SRCS != find '$(SRC_DIR)' -iname '*.c' + +SRCS_H != find include -iname '*.h' + +OBJS := $(SRCS:%=$(BUILD_DIR)/%.o) $(SRCS_THIRDPARTY:%=$(BUILD_DIR)/%.o) +DEPS := $(SRCS:%=$(BUILD_DIR)/%.d) + +_CFLAGS := $(addprefix -I,$(INC_DIRS)) $(CFLAGS) -Wall -Wextra -DLANDER_VERSION=\"$(VERSION)\" +_LDFLAGS := $(addprefix -L,$(LIB_DIRS)) $(addprefix -l,$(LIBS)) $(LDFLAGS) + +.PHONY: all +all: $(BIN) + + +# =====COMPILATION===== +# Utility used by the CI to lint +.PHONY: objs +objs: $(OBJS) + +$(BIN): $(OBJS) + $(CC) -o $@ $(OBJS) $(_LDFLAGS) + +$(BUILD_DIR)/$(SRC_DIR)/%.c.o: $(SRC_DIR)/%.c + mkdir -p $(dir $@) + $(CC) $(_CFLAGS) -c $< -o $@ + +$(BUILD_DIR)/$(THIRDPARTY_DIR)/%.c.o: $(THIRDPARTY_DIR)/%.c + mkdir -p $(dir $@) + $(CC) $(_CFLAGS) -c $< -o $@ + +# =====TESTING===== +.PHONY: run +run: $(BIN) + LANDER_API_KEY=test \ + LANDER_DATA_DIR=data \ + '$(BUILD_DIR)/$(BIN_FILENAME)' + +.PHONY: valgrind +valgrind: $(BIN) + LANDER_API_KEY=test \ + LANDER_DATA_DIR=data \ + valgrind '$(BUILD_DIR)/$(BIN_FILENAME)' + +.PHONY: test +test: $(TARGETS_TEST) + +.PHONY: test-mem +test-mem: $(TARGETS_MEM_TEST) + +.PHONY: $(TARGETS_TEST) +$(TARGETS_TEST): test-%: % + ./$^ + +.PHONY: $(TARGETS_MEM_TEST) +$(TARGETS_MEM_TEST): test-mem-%: % + valgrind --tool=memcheck --error-exitcode=1 --track-origins=yes --leak-check=full ./$^ + +.PHONY: build-test +build-test: $(BINS_TEST) + +$(BINS_TEST): %: %.c.o + $(CC) \ + $^ -o $@ + +# Along with the include directory, each test includes $(TEST_DIR) (which +# contains the acutest.h header file), and the src directory of the module it's +# testing. This allows tests to access internal methods, which aren't publicly +# exposed. +$(BUILD_DIR)/$(TEST_DIR)/%.c.o: $(TEST_DIR)/%.c + mkdir -p $(dir $@) + $(CC) $(_CFLAGS) -I$(TEST_DIR) \ + -I$(dir $(@:$(BUILD_DIR)/$(TEST_DIR)/%=$(SRC_DIR)/%)) \ + -c $< -o $@ + + +# =====MAINTENANCE===== +.PHONY: lint +lint: + clang-format -n --Werror $(SRCS) $(SRCS_H) $(SRCS_H_INTERNAL) + +.PHONY: fmt +fmt: + clang-format -i $(SRCS) $(SRCS_H) $(SRCS_H_INTERNAL) + +.PHONY: check +check: + mkdir -p $(BUILD_DIR)/cppcheck + cppcheck \ + $(addprefix -I,$(INC_DIRS)) \ + --cppcheck-build-dir=$(BUILD_DIR)/cppcheck \ + --error-exitcode=1 \ + --enable=warning,style \ + --inline-suppr \ + --check-level=exhaustive \ + --quiet \ + -j$(shell nproc) \ + $(SRCS) + +.PHONY: clean +clean: + rm -rf $(BUILD_DIR) + +.PHONY: bear +bear: clean + bear -- make + bear --append -- make build-test + + +# Make make aware of the .d files +-include $(DEPS) diff --git a/landerctl/config.mk b/landerctl/config.mk new file mode 100644 index 0000000..e870a57 --- /dev/null +++ b/landerctl/config.mk @@ -0,0 +1,22 @@ +VERSION := 0.2.0 + +BIN_FILENAME = landerctl + +BUILD_DIR = build +SRC_DIR = src +TEST_DIR = test + +INC_DIRS = include +LIBS ?= magic curl +LIB_DIRS = + +# -MMD: generate a .d file for every source file. This file can be imported by +# make and makes make aware that a header file has been changed, ensuring an +# object file is also recompiled if only a header is changed. +# -MP: generate a dummy target for every header file (according to the docs it +# prevents some errors when removing header files) +CFLAGS ?= -MMD -MP -g + +# When compiling release builds, these flags are better +# CLAGS = -O3 +# LDFLAGS = -flto diff --git a/landerctl/include/landerctl.h b/landerctl/include/landerctl.h new file mode 100644 index 0000000..75da26e --- /dev/null +++ b/landerctl/include/landerctl.h @@ -0,0 +1,60 @@ +#ifndef LANDERCTL +#define LANDERCTL + +#include + +#include + +typedef struct landerctl_cfg { + const char *api_key; + const char *server_url; + const char *ca_certs_bundle; +} landerctl_cfg; + +typedef enum landerctl_cfg_err { + landerctl_cfg_err_ok = 0, + landerctl_cfg_err_not_found, + landerctl_cfg_err_invalid, + landerctl_cfg_err_incomplete, +} landerctl_cfg_err; + +/** + * Try to parse the required config arguments from the config file + * + * @param out config to write values to. Existing values are overwritten + * @param path path to config file + */ +landerctl_cfg_err landerctl_cfg_parse(landerctl_cfg *out, const char *path); + +typedef enum landerctl_mode { + landerctl_mode_none = 0, + landerctl_mode_short, + landerctl_mode_paste, + landerctl_mode_file, +} landerctl_mode; + +typedef enum landerctl_err { + landerctl_err_ok = 0, + landerctl_err_not_found +} landerctl_err; + +typedef struct landerctl_ctx { + landerctl_cfg cfg; + landerctl_mode mode; + bool secure; + bool verbose; + const char *arg; + const char *key; + CURL *curl; + struct curl_slist *headers; + FILE *data_file; +} landerctl_ctx; + +const char *landerctl_err_msg(landerctl_err err); + +void landerctl_set_common(landerctl_ctx *ctx); +landerctl_err landerctl_post_short(landerctl_ctx *ctx); +landerctl_err landerctl_post_paste(landerctl_ctx *ctx); +landerctl_err landerctl_post_file(landerctl_ctx *ctx); + +#endif diff --git a/landerctl/src/cfg_parse.c b/landerctl/src/cfg_parse.c new file mode 100644 index 0000000..a52e792 --- /dev/null +++ b/landerctl/src/cfg_parse.c @@ -0,0 +1,86 @@ +#include +#include +#include +#include + +#include "landerctl.h" + +static const char cfg_line_regex_expr[] = "^([^ ]+) *= *([^ ]+)$"; + +landerctl_cfg_err landerctl_cfg_parse(landerctl_cfg *out, const char *path) { + FILE *f = fopen(path, "r"); + + if (f == NULL) { + return landerctl_cfg_err_not_found; + } + + struct { + const char *key; + const char **var; + } key_to_vars[] = { + {"api_key", &out->api_key}, + {"server_url", &out->server_url}, + {"ca_certs_bundle", &out->ca_certs_bundle}, + }; + size_t key_to_vars_len = sizeof(key_to_vars) / sizeof(key_to_vars[0]); + + // We NULL everything beforehand so we can check if we have all needed + // variables + for (size_t i = 0; i < key_to_vars_len; i++) { + *key_to_vars[i].var = NULL; + } + + regex_t cfg_line_regex; + regcomp(&cfg_line_regex, cfg_line_regex_expr, REG_EXTENDED); + + // Accept lines of at most 256 lines + char line[256]; + landerctl_cfg_err res = landerctl_cfg_err_ok; + + while (fgets(line, sizeof(line), f) != NULL) { + // Last character might be a newline + size_t len = strlen(line); + + if (line[len - 1] == '\n') { + line[len - 1] = '\0'; + } + + regmatch_t reg_groups[3]; + + if (regexec(&cfg_line_regex, line, 3, reg_groups, 0) != 0) { + res = landerctl_cfg_err_not_found; + + break; + } + + // api_key is currently the only value we parse + size_t key_len = reg_groups[1].rm_eo - reg_groups[1].rm_so; + + for (size_t i = 0; i < key_to_vars_len; i++) { + if ((key_len == strlen(key_to_vars[i].key)) && + (strncmp(&line[reg_groups[1].rm_so], key_to_vars[i].key, key_len) == + 0)) { + int val_len = reg_groups[2].rm_eo - reg_groups[2].rm_so; + char *buf = malloc(val_len + 1); + strncpy(buf, &line[reg_groups[2].rm_so], val_len); + buf[val_len] = '\0'; + + *key_to_vars[i].var = buf; + break; + } + } + } + + if (res == landerctl_cfg_err_ok) { + for (size_t i = 0; i < key_to_vars_len; i++) { + if (*key_to_vars[i].var == NULL) { + res = landerctl_cfg_err_incomplete; + break; + } + } + } + + fclose(f); + + return res; +} diff --git a/landerctl/src/main.c b/landerctl/src/main.c new file mode 100644 index 0000000..2e0b375 --- /dev/null +++ b/landerctl/src/main.c @@ -0,0 +1,214 @@ +#include +#include +#include +#include +#include + +#include +#include + +#include "landerctl.h" + +const char *cfg_file_name = ".landerrc"; +const char *usage = "%s [-SPFsv] arg [key]\n"; + +int main(int argc, char **argv) { + landerctl_ctx ctx = {0}; + char *err_msg = NULL; + + landerctl_cfg_err parse_res; + + const char *home_dir = getenv("HOME"); + + if (home_dir == NULL) { + parse_res = landerctl_cfg_parse(&ctx.cfg, cfg_file_name); + } else { + char cfg_path[strlen(home_dir) + strlen(cfg_file_name) + 2]; + sprintf(cfg_path, "%s/%s", home_dir, cfg_file_name); + + parse_res = landerctl_cfg_parse(&ctx.cfg, cfg_path); + } + + switch (parse_res) { + case landerctl_cfg_err_ok: + break; + case landerctl_cfg_err_not_found: + err_msg = "Config file not found"; + break; + case landerctl_cfg_err_invalid: + err_msg = "Invalid config file"; + break; + case landerctl_cfg_err_incomplete: + err_msg = "Incomplete config file"; + break; + } + + if (err_msg != NULL) { + fprintf(stderr, "%s\n", err_msg); + exit(1); + } + + opterr = 0; + + int c; + + while ((c = getopt(argc, argv, "SPFsv")) != -1) { + switch (c) { + case 'S': + ctx.mode = landerctl_mode_short; + break; + case 'P': + ctx.mode = landerctl_mode_paste; + break; + case 'F': + ctx.mode = landerctl_mode_file; + break; + case 's': + ctx.secure = true; + break; + case 'v': + ctx.verbose = true; + break; + case '?': + printf(usage, argv[0]); + exit(2); + } + } + + if (ctx.mode == landerctl_mode_none) { + printf("No mode specified.\n\n"); + printf(usage, argv[0]); + exit(2); + } + + if (optind == argc || (argc - optind > 2)) { + printf(usage, argv[0]); + exit(2); + } + + ctx.arg = argv[optind]; + ctx.key = argc - optind == 2 ? argv[optind + 1] : NULL; + + curl_global_init(CURL_GLOBAL_ALL); + ctx.curl = curl_easy_init(); + + if (ctx.curl == NULL) { + exit(255); + } + + landerctl_set_common(&ctx); + landerctl_err res; + + switch (ctx.mode) { + case landerctl_mode_short: + res = landerctl_post_short(&ctx); + break; + case landerctl_mode_paste: + res = landerctl_post_paste(&ctx); + break; + case landerctl_mode_file: + res = landerctl_post_file(&ctx); + break; + default: + return 7; + } + + if (res != landerctl_err_ok) { + printf("%s\n", landerctl_err_msg(res)); + exit(6); + } + + if (ctx.verbose) { + curl_easy_setopt(ctx.curl, CURLOPT_VERBOSE, 1L); + } + + curl_easy_setopt(ctx.curl, CURLOPT_HTTPHEADER, ctx.headers); + curl_easy_setopt(ctx.curl, CURLOPT_CAINFO, ctx.cfg.ca_certs_bundle); + + char curl_err_msg[CURL_ERROR_SIZE]; + curl_easy_setopt(ctx.curl, CURLOPT_ERRORBUFFER, curl_err_msg); + + int exit_code = 0; + + if (curl_easy_perform(ctx.curl) == CURLE_OK) { + long response_code; + curl_easy_getinfo(ctx.curl, CURLINFO_RESPONSE_CODE, &response_code); + + if (response_code < 200 || response_code > 299) { + fprintf(stderr, "HTTP status code %li\n", response_code); + exit_code = 3; + } else { + struct curl_header *location_header; + + if (curl_easy_header(ctx.curl, "Location", 0, CURLH_HEADER, -1, + &location_header) == CURLHE_OK) { + printf("%s%s\n", ctx.cfg.server_url, location_header->value); + } else { + fprintf(stderr, "Server returned a 2xx without a Location header.\n"); + exit_code = 5; + } + } + } else { + fprintf(stderr, "Libcurl encountered an error: %s\n", curl_err_msg); + exit_code = 4; + } + + curl_easy_cleanup(ctx.curl); + curl_slist_free_all(ctx.headers); + + if (ctx.data_file != NULL) { + fclose(ctx.data_file); + } + + return exit_code; + + /* struct stat sb; */ + + /* stat(argv[1], &sb); */ + + /* printf("file size: %lu\n", sb.st_size); */ + + /* FILE *f = fopen(argv[1], "rb"); */ + + /* if (f == NULL) { */ + /* printf("Couldn't open file.\n"); */ + /* exit(1); */ + /* } */ + + /* CURL *curl = curl_easy_init(); */ + + /* if (curl == NULL) { */ + /* exit(1); */ + /* } */ + + /* curl_easy_setopt(curl, CURLOPT_URL, "http://localhost:18080/f/"); */ + /* curl_easy_setopt(curl, CURLOPT_UPLOAD, 1L); */ + /* curl_easy_setopt(curl, CURLOPT_READDATA, f); */ + + /* curl_off_t file_size = sb.st_size; */ + /* /1* curl_easy_setopt(curl, CURLOPT_INFILESIZE_LARGE, file_size); *1/ */ + /* curl_easy_setopt(curl, CURLOPT_POST, 1L); */ + /* curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE_LARGE, file_size); */ + + /* magic_t cookie = magic_open(MAGIC_MIME_TYPE); */ + /* magic_load(cookie, NULL); */ + /* const char *mime_type = magic_file(cookie, argv[1]); */ + + /* char content_type_header[strlen(mime_type) + 24]; */ + /* sprintf(content_type_header, "X-Lander-Content-Type: %s", mime_type); */ + + /* char content_length_header[32]; */ + /* sprintf(content_length_header, "Content-Length: %lu", sb.st_size); */ + + /* struct curl_slist *list = NULL; */ + /* list = curl_slist_append(list, content_type_header); */ + /* list = curl_slist_append(list, content_length_header); */ + /* list = curl_slist_append(list, "X-Api-Key: test"); */ + + /* curl_easy_setopt(curl, CURLOPT_HTTPHEADER, list); */ + + /* curl_easy_setopt(curl, CURLOPT_VERBOSE, 1); */ + /* curl_easy_perform(curl); */ + + /* curl_slist_free_all(list); */ +} diff --git a/landerctl/src/post.c b/landerctl/src/post.c new file mode 100644 index 0000000..16ebac4 --- /dev/null +++ b/landerctl/src/post.c @@ -0,0 +1,131 @@ +#include +#include +#include + +#include +#include + +#include "landerctl.h" + +const char *landerctl_err_msg(landerctl_err err) { + switch (err) { + case landerctl_err_not_found: + return "File not found"; + default: + return ""; + } +} + +void landerctl_set_common(landerctl_ctx *ctx) { + size_t url_len = strlen(ctx->cfg.server_url) + 4; + + if (ctx->key != NULL) { + url_len += strlen(ctx->key); + } + + char mode_char; + + switch (ctx->mode) { + case landerctl_mode_short: + mode_char = 's'; + break; + case landerctl_mode_paste: + mode_char = 'p'; + break; + case landerctl_mode_file: + mode_char = 'f'; + break; + // Shouldn't be able to happen + default: + return; + } + + char url[url_len + 1]; + + if (ctx->key == NULL) { + sprintf(url, "%s/%c%s/", ctx->cfg.server_url, mode_char, + ctx->secure ? "l" : ""); + } else { + sprintf(url, "%s/%c%s/%s", ctx->cfg.server_url, mode_char, + ctx->secure ? "l" : "", ctx->key); + } + + curl_easy_setopt(ctx->curl, CURLOPT_URL, url); + + // Add API key header + char api_key_header[strlen(ctx->cfg.api_key) + 12]; + sprintf(api_key_header, "X-Api-Key: %s", ctx->cfg.api_key); + + ctx->headers = curl_slist_append(NULL, api_key_header); + + curl_easy_setopt(ctx->curl, CURLOPT_USERAGENT, + "landerctl/" LANDER_VERSION ""); +} + +landerctl_err landerctl_post_short(landerctl_ctx *ctx) { + curl_easy_setopt(ctx->curl, CURLOPT_POSTFIELDSIZE, strlen(ctx->arg)); + curl_easy_setopt(ctx->curl, CURLOPT_POSTFIELDS, ctx->arg); + + return landerctl_err_ok; +} + +landerctl_err landerctl_post_paste(landerctl_ctx *ctx) { + ctx->data_file = fopen(ctx->arg, "rb"); + + if (ctx->data_file == NULL) { + return landerctl_err_not_found; + } + + struct stat sb; + stat(ctx->arg, &sb); + + curl_easy_setopt(ctx->curl, CURLOPT_POST, 1L); + curl_easy_setopt(ctx->curl, CURLOPT_READDATA, ctx->data_file); + curl_easy_setopt(ctx->curl, CURLOPT_POSTFIELDSIZE, sb.st_size); + + return landerctl_err_ok; +} + +landerctl_err landerctl_post_file(landerctl_ctx *ctx) { + ctx->data_file = fopen(ctx->arg, "rb"); + + if (ctx->data_file == NULL) { + return landerctl_err_not_found; + } + + struct stat sb; + stat(ctx->arg, &sb); + + curl_easy_setopt(ctx->curl, CURLOPT_POST, 1L); + curl_easy_setopt(ctx->curl, CURLOPT_READDATA, ctx->data_file); + curl_easy_setopt(ctx->curl, CURLOPT_POSTFIELDSIZE_LARGE, sb.st_size); + curl_easy_setopt(ctx->curl, CURLOPT_NOPROGRESS, 0L); + + magic_t cookie = magic_open(MAGIC_MIME_TYPE); + + if (magic_load(cookie, NULL) == 0) { + const char *mime_type = magic_file(cookie, ctx->arg); + + if (mime_type != NULL) { + char content_type_header[strlen(mime_type) + 24]; + sprintf(content_type_header, "X-Lander-Content-Type: %s", mime_type); + + ctx->headers = curl_slist_append(ctx->headers, content_type_header); + } else { + printf("Couldn't determine mime type; skipping Content-Type header\n"); + } + } else { + printf("Couldn't load magic file; skipping Content-Type header\n"); + } + + char s[strlen(ctx->arg) + 1]; + strcpy(s, ctx->arg); + const char *base_name = basename(s); + + char filename_header[strlen(base_name) + 20]; + sprintf(filename_header, "X-Lander-Filename: %s", base_name); + + ctx->headers = curl_slist_append(ctx->headers, filename_header); + + return landerctl_err_ok; +} diff --git a/lsm/Makefile b/lsm/Makefile new file mode 100644 index 0000000..716d82d --- /dev/null +++ b/lsm/Makefile @@ -0,0 +1,127 @@ +# https://spin.atomicobject.com/2016/08/26/makefile-c-projects/ was a great +# base for this Makefile + +-include config.mk + +LIB := $(BUILD_DIR)/$(LIB_FILENAME) + +SRCS != find '$(SRC_DIR)' -iname '*.c' +SRCS_H != find $(INC_DIRS) -iname '*.h' +SRCS_H_INTERNAL != find $(SRC_DIR) -iname '*.h' +SRCS_TEST != find '$(TEST_DIR)' -iname '*.c' +SRCS_EXAMPLE != find '$(EXAMPLE_DIR)' -iname '*.c' + +OBJS := $(SRCS:%=$(BUILD_DIR)/%.o) +OBJS_TEST := $(SRCS_TEST:%=$(BUILD_DIR)/%.o) +OBJS_EXAMPLE := $(SRCS_EXAMPLE:%=$(BUILD_DIR)/%.o) +DEPS := $(SRCS:%=$(BUILD_DIR)/%.d) $(SRCS_TEST:%=$(BUILD_DIR)/%.d) $(SRCS_EXAMPLE:%=$(BUILD_DIR)/%.d) + +BINS_TEST := $(OBJS_TEST:%.c.o=%) +BINS_EXAMPLE := $(OBJS_EXAMPLE:%.c.o=%) +TARGETS_TEST := $(BINS_TEST:%=test-%) +TARGETS_EXAMPLE := $(BINS_EXAMPLE:%=test-%) +TARGETS_MEM_TEST := $(BINS_TEST:%=test-mem-%) + +_CFLAGS := $(addprefix -I,$(INC_DIRS)) $(CFLAGS) -Wall -Wextra + +.PHONY: all +all: lib + + +# =====COMPILATION===== +# Utility used by the CI to lint +.PHONY: objs +objs: $(OBJS) + +.PHONY: lib +lib: $(LIB) +$(LIB): $(OBJS) + ar -rcs $@ $(OBJS) + +$(BUILD_DIR)/$(SRC_DIR)/%.c.o: $(SRC_DIR)/%.c + mkdir -p $(dir $@) + $(CC) -c $(_CFLAGS) $< -o $@ + + +# =====TESTING===== +.PHONY: test +test: $(TARGETS_TEST) + +.PHONY: test-mem +test-mem: $(TARGETS_MEM_TEST) + +.PHONY: $(TARGETS_TEST) +$(TARGETS_TEST): test-%: % + ./$^ + +.PHONY: $(TARGETS_MEM_TEST) +$(TARGETS_MEM_TEST): test-mem-%: % + valgrind --tool=memcheck --error-exitcode=1 --track-origins=yes --leak-check=full ./$^ + +.PHONY: build-test +build-test: $(BINS_TEST) + +$(BINS_TEST): %: %.c.o $(LIB) + $(CC) \ + $^ -o $@ + +# Along with the include directory, each test includes $(TEST_DIR) (which +# contains the acutest.h header file), and the src directory of the module it's +# testing. This allows tests to access internal methods, which aren't publicly +# exposed. +$(BUILD_DIR)/$(TEST_DIR)/%.c.o: $(TEST_DIR)/%.c + mkdir -p $(dir $@) + $(CC) $(_CFLAGS) -I$(TEST_DIR) \ + -I$(dir $(@:$(BUILD_DIR)/$(TEST_DIR)/%=$(SRC_DIR)/%)) \ + -c $< -o $@ + +# =====EXAMPLES===== +.PHONY: build-example +build-example: $(BINS_EXAMPLE) + +$(BINS_EXAMPLE): %: %.c.o $(LIB) + $(CC) \ + $^ -o $@ + +# Example binaries link the resulting library +$(BUILD_DIR)/$(EXAMPLE_DIR)/%.c.o: $(EXAMPLE_DIR)/%.c + mkdir -p $(dir $@) + $(CC) $(_CFLAGS) -I$(PUB_INC_DIR) -c $< -o $@ + +# =====MAINTENANCE===== +.PHONY: lint +lint: + clang-format -n --Werror $(SRCS) $(SRCS_H) $(SRCS_H_INTERNAL) $(SRCS_EXAMPLE) + +.PHONY: fmt +fmt: + clang-format -i $(SRCS) $(SRCS_H) $(SRCS_H_INTERNAL) $(SRCS_EXAMPLE) + +.PHONY: check +check: + mkdir -p $(BUILD_DIR)/cppcheck + cppcheck \ + $(addprefix -I,$(INC_DIRS)) \ + --cppcheck-build-dir=$(BUILD_DIR)/cppcheck \ + --error-exitcode=1 \ + --enable=warning,style \ + --inline-suppr \ + --check-level=exhaustive \ + --quiet \ + -j$(shell nproc) \ + $(SRCS) + +.PHONY: clean +clean: + rm -rf $(BUILD_DIR) + + +.PHONY: bear +bear: clean + bear -- make + bear --append -- make build-test + bear --append -- make build-example + + +# Make make aware of the .d files +-include $(DEPS) diff --git a/lsm/config.mk b/lsm/config.mk new file mode 100644 index 0000000..5e75e26 --- /dev/null +++ b/lsm/config.mk @@ -0,0 +1,15 @@ +LIB_FILENAME = liblsm.a + +BUILD_DIR = build +SRC_DIR = src +TEST_DIR = test +EXAMPLE_DIR = example +PUB_INC_DIR = include +INC_DIRS = $(PUB_INC_DIR) src/_include + +# -MMD: generate a .d file for every source file. This file can be imported by +# make and makes make aware that a header file has been changed, ensuring an +# object file is also recompiled if only a header is changed. +# -MP: generate a dummy target for every header file (according to the docs it +# prevents some errors when removing header files) +CFLAGS ?= -MMD -MP -g diff --git a/lsm/example/test.c b/lsm/example/test.c new file mode 100644 index 0000000..660cd04 --- /dev/null +++ b/lsm/example/test.c @@ -0,0 +1,56 @@ +#include +#include + +#include "lsm.h" +#include "lsm/store.h" +#include "lsm/str.h" + +int main() { + lsm_str *data_dir; + lsm_str_init_copy(&data_dir, "data"); + + lsm_store *store; + assert(lsm_store_load(&store, data_dir) == lsm_error_ok); + + lsm_str *key; + lsm_str_init_copy(&key, "key"); + + lsm_entry_handle *handle; + assert(lsm_store_insert(&handle, store, key) == lsm_error_ok); + + lsm_str *attr; + lsm_str_init_copy(&attr, "some attribute value"); + lsm_entry_attr_insert(handle, 1, attr); + + lsm_str *data; + lsm_str_init_copy(&data, "hello"); + + for (int i = 0; i < 50; i++) { + lsm_entry_data_append(handle, data); + } + lsm_entry_close(handle); + + assert(lsm_store_open_read(&handle, store, key) == lsm_error_ok); + + char buf[24]; + uint64_t read; + uint64_t total = 0; + + lsm_entry_data_read(&read, buf, handle, 24); + total += read; + + while (read > 0) { + printf("%.*s", read, buf); + lsm_entry_data_read(&read, buf, handle, 24); + total += read; + } + printf("\n%lu", total); + + lsm_entry_close(handle); + + assert(lsm_store_open_write(&handle, store, key) == lsm_error_ok); + lsm_entry_remove(handle); + lsm_entry_close(handle); + + assert(lsm_store_open_read(&handle, store, key) == lsm_error_not_found); +} diff --git a/lsm/include/lsm.h b/lsm/include/lsm.h new file mode 100644 index 0000000..0a1639f --- /dev/null +++ b/lsm/include/lsm.h @@ -0,0 +1,80 @@ +#ifndef LSM +#define LSM + +#include + +#define LSM_RES(x) \ + { \ + lsm_error res = x; \ + if (res != lsm_error_ok) \ + return res; \ + } + +#define LSM_RES2(x, e) \ + { \ + lsm_error res = x; \ + if (res != lsm_error_ok) { \ + e; \ + return res; \ + } \ + } + +typedef enum lsm_error { + lsm_error_ok = 0, + lsm_error_failed_alloc = 1, + lsm_error_not_found = 2, + lsm_error_already_present = 3, + lsm_error_null_value = 4, + lsm_error_failed_io = 5, + lsm_error_lock_busy = 6, +} lsm_error; + +/*typedef struct lsm_string { */ +/* uint64_t len; */ +/* union { */ +/* void *ptr; */ +/* char val[8]; */ +/* } str; */ +/*} lsm_string; */ + +/*/1** */ +/* * The type of an attribute. Each type is represented as a single bit of a */ +/* * 32-bit integer, so they can be easily combined into a bitmap. */ +/* *1/ */ +/*typedef enum lsm_attr_type { lsm_attr_type_entry_type = 1 << 0 } + * lsm_attr_type; */ + +/*/1** */ +/* * A single attribute associated with an entry */ +/* *1/ */ +/*typedef struct lsm_attr { */ +/* lsm_attr_type type; */ +/* lsm_string str; */ +/*} lsm_attr; */ + +/*/1** */ +/* * Represents a collection of attributes for an entry. A collection can only + */ +/* * contain one of each attribute. */ +/* *1/ */ +/*typedef struct lsm_attr_list { */ +/* uint64_t count; */ +/* lsm_attr *items; */ +/* uint32_t bitmap; */ +/*} lsm_attr_list; */ + +/*/1** */ +/* * An entry inside an LSM store */ +/* *1/ */ +/*typedef struct lsm_entry { */ +/* lsm_string key; */ +/* lsm_attr_list attrs; */ +/* lsm_string data; */ +/*} lsm_entry; */ + +/*/1** */ +/* * A store of entries, which manages its data both in-memory and on disk. */ +/* *1/ */ +/*typedef struct lsm_store lsm_store; */ + +#endif diff --git a/lsm/include/lsm/bt.h b/lsm/include/lsm/bt.h new file mode 100644 index 0000000..581288f --- /dev/null +++ b/lsm/include/lsm/bt.h @@ -0,0 +1,71 @@ +#ifndef LSM_BT +#define LSM_BT + +#include "lsm.h" + +/** + * A binary tree implementation using char values as keys + */ +typedef struct lsm_bt lsm_bt; + +/** + * Initialize a new binary tree + * + * @param ptr where to store newly allocated pointer + */ +lsm_error lsm_bt_init(lsm_bt **ptr); + +/** + * Deallocate an entire binary tree, including all its nodes + */ +void lsm_bt_free(lsm_bt *bt); + +/** + * Remove the binary tree's entire contents, but keep the struct allocated. + */ +void lsm_bt_clear(lsm_bt *bt); + +/** + * Return the size of the binary tree + */ +uint64_t lsm_bt_size(const lsm_bt *bt); + +/** + * Search for the data stored behind the given key. + * + * @param out pointer to store data pointer in + * @param bt binary tree to search + * @param key key to search + */ +lsm_error lsm_bt_search(void **out, const lsm_bt *bt, char key); + +/** + * Insert a new data value into the tree with the given key. + * + * @param bt binary tree to insert into + * @param key key to insert + * @param data data to store + */ +lsm_error lsm_bt_insert(lsm_bt *bt, char key, void *data); + +/** + * Remove the given key from the binary tree. Ownership of the data pointer is + * returned to the caller. + * + * @param out address to write data pointer to + * @param bt binary tree to remove from + * @param key key to remove + */ +lsm_error lsm_bt_remove(void **out, lsm_bt *bt, char key); + +/** + * Replace the data at an existing key with new data, returning the old. + * + * @param out address to write old data pointer to + * @param bt binary tree to replace in + * @param key key to replace at + * @param data new data to store + */ +lsm_error lsm_bt_replace(void **out, lsm_bt *bt, char key, void *data); + +#endif diff --git a/lsm/include/lsm/store.h b/lsm/include/lsm/store.h new file mode 100644 index 0000000..c680c43 --- /dev/null +++ b/lsm/include/lsm/store.h @@ -0,0 +1,231 @@ +#ifndef LSM_STORE +#define LSM_STORE + +#include +#include + +#include "lsm.h" +#include "lsm/str.h" + +#define LSM_STORE_DATA_LEVELS 3 + +/** + * A handle referencing an entry inside a store. Read/write operations from/to + * the entry go through this handle. + */ +typedef struct lsm_entry_handle lsm_entry_handle; + +/** + * Checks whether the entry has an attribute with the specified type. + * + * @param entry entry to check + * @param type type of attribute to check for + */ +bool lsm_entry_attr_present(lsm_entry_handle *handle, uint8_t type); + +/** + * Retrieve the contents of an attribute from an entry, if present + * + * @param out where to store pointer to attribute data + * @param entry entry to search for + * @param type type of attribute to return + */ +lsm_error lsm_entry_attr_get(lsm_str **out, lsm_entry_handle *handle, + uint8_t type); + +/** + * Convenience wrapper around `lsm_entry_attr_get` that can be used if we know + * beforehand the attribute value is a 64-bit number. + * + * @param out where to store attribute data + * @param entry entry to search for + * @param type type of attribute to return + */ +lsm_error lsm_entry_attr_get_uint64_t(uint64_t *out, lsm_entry_handle *handle, + uint8_t type); + +/** + * Convenience wrapper around `lsm_entry_attr_get` that can be used if we know + * beforehand the attribute value is an 8-bit number. + * + * @param out where to store attribute data + * @param entry entry to search for + * @param type type of attribute to return + */ +lsm_error lsm_entry_attr_get_uint8_t(uint8_t *out, lsm_entry_handle *handle, + uint8_t type); + +/** + * Add a new attribute to the entry. + * + * @param entry entry to modify + * @param type type of attribute to add + * @param data data of attribute; ownership of pointer is taken over + */ +lsm_error lsm_entry_attr_insert(lsm_entry_handle *handle, uint8_t type, + lsm_str *data); + +/** + * Convenience wrapper around `lsm_entry_attr_insert` that can be used if the + * data to be stored is a 64-bit number. + * + * @param entry entry to modify + * @param type type of attribute to add + * @param data data of attribute + */ +lsm_error lsm_entry_attr_insert_uint64_t(lsm_entry_handle *handle, uint8_t type, + uint64_t data); + +/** + * Convenience wrapper around `lsm_entry_attr_insert` that can be used if the + * data to be stored is an 8-bit number. + * + * @param entry entry to modify + * @param type type of attribute to add + * @param data data of attribute + */ +lsm_error lsm_entry_attr_insert_uint8_t(lsm_entry_handle *handle, uint8_t type, + uint8_t data); + +/** + * Remove an atribute from the given entry, if present. + * + * @param out pointer to store removed data pointer in. If NULL, data pointer + * will be leaked. + * @param entry entry to remove attribute from + * @param type type of attribute to remove + */ +lsm_error lsm_entry_attr_remove(lsm_str **out, lsm_entry_handle *handle, + uint8_t type); + +/** + * A store consisting of LSM entries. + * + * A store manages both an in-memory data structure for quick lookup, and a + * database file for persistent storage of the contained entries. + */ +typedef struct lsm_store lsm_store; + +/** + * Allocate and initialize a new lsm_store object. + * + * @param ptr where to store newly allocated pointer + */ +lsm_error lsm_store_init(lsm_store **ptr); + +/** + * Return how many elements are stored in the trie. + * + * @param store store to use + * @return how many elements are in the store + */ +uint64_t lsm_store_size(const lsm_store *store); + +/** + * Open the given database file and load it into a new store object. + * + * @param ptr pointer to store newly allocated store + * @param data_path path to the data directory + */ +lsm_error lsm_store_load(lsm_store **ptr, lsm_str *data_path); + +/** + * Dealocate an existing lsm_store object. + * + * @param store object to deallocate + */ +void lsm_store_free(lsm_store *store); + +/** + * Open a read handle to the given entry. This entry must be properly closed + * using `lsm_store_handle_close`. + * + * @param out pointer to store handle pointer + * @param store store to retrieve entry from + * @param key key to search + */ +lsm_error lsm_store_open_read(lsm_entry_handle **out, lsm_store *store, + const lsm_str *key); + +/** + * Open a write handle to the given entry. This handle should only be used for + * writing; read operations on this handle are unsupported. This entry must be + * properly closed using `lsm_store_handle_close`. + * + * @param out pointer to store handle pointer + * @param store store to retrieve entry from + * @param key key to search + */ +lsm_error lsm_store_open_write(lsm_entry_handle **out, lsm_store *store, + const lsm_str *key); + +/** + * Close an open entry handle. + * + * @param store store the handle's entry is stored in + * @param handle handle to close + */ +void lsm_entry_close(lsm_entry_handle *handle); + +/** + * Insert a new entry into the store, returning a write handle to the newly + * created entry. + * + * @param out pointer to store new entry pointer in + * @param store store to modify + * @param key key to add; ownership of key pointer is taken over + */ +lsm_error lsm_store_insert(lsm_entry_handle **out, lsm_store *store, + lsm_str *key); + +/** + * Mark the entry as removed. + * + * @param handle handle to entry to remove + */ +void lsm_entry_remove(lsm_entry_handle *handle); + +/** + * Append new data to the given entry, which is expected to be in the store. + * + * This function will append either to disk or to memory, depending on the + * length of the entry's data. + * + * @param store store the entry is stored in + * @param entry entry to append data to + * @param data data to append + */ +lsm_error lsm_entry_data_append(lsm_entry_handle *handle, const lsm_str *data); + +/** + * Same as `lsm_entry_data_append`, except that it takes a direct char array. + * + * @param store store the entry is stored in + * @param entry entry to append data to + * @param data data to append + * @param len length of data array + */ +lsm_error lsm_entry_data_append_raw(lsm_entry_handle *handle, char *data, + uint64_t len); + +/** + * Read a number of bytes from the entry's data field. The position from which + * data is read is dependent on previous read calls. + * + * @param out where to write how many bytes were read + * @param buf buffer to store read data in + * @param handle entry handle to read from + * @param len how many bytes to read at most + */ +lsm_error lsm_entry_data_read(uint64_t *out, char *buf, + lsm_entry_handle *handle, uint64_t len); + +/** + * Return the length of the entry's data. + * + * @param handle entry handle to return length for + * @return length of the data + */ +uint64_t lsm_entry_data_len(lsm_entry_handle *handle); + +#endif diff --git a/lsm/include/lsm/str.h b/lsm/include/lsm/str.h new file mode 100644 index 0000000..b3cf965 --- /dev/null +++ b/lsm/include/lsm/str.h @@ -0,0 +1,183 @@ +#ifndef LSM_STR +#define LSM_STR + +#include + +#include "lsm.h" + +/** + * Represents a string (or really any kind of data) with a known length. Data + * with length 8 or less is stored inside the pointer, and does not allocate + * additional memory. + */ +typedef struct lsm_str lsm_str; + +/** + * Allocate and initialize a new lsm_str object + * + * @param ptr pointer to store newly allocated pointer + * @param s string to convert into lsm string; ownership is taken over + */ +lsm_error lsm_str_init(lsm_str **ptr, char *s); + +/** + * Allocate a new string struct of length 0. + * + * @param ptr pointer to store newly allocated pointer in + */ +lsm_error lsm_str_init_zero(lsm_str **ptr); + +/** + * Allocate and initialize a new lsm_str object, but copy the original string + * instead of taking over ownership, leaving the original string untouched. + * + * @param ptr pointer to store newly allocated pointer + * @param s string to copy into lsm string + */ +lsm_error lsm_str_init_copy(lsm_str **ptr, const char *s); + +/** + * Same as `lsm_str_init_copy`, except that it takes an additional argument + * specifying the length of the string to copy over. This can be used to more + * easily "cut" parts of a C-style string out into an LSM string. + * + * @param ptr pointer to store newly allocated pointer + * @param s string to copy into lsm string + * @param len length of string to copy + */ +lsm_error lsm_str_init_copy_n(lsm_str **ptr, const char *s, uint64_t len); + +/** + * Overwrite an existing lsm_str so it now represents the new provided string. + * The string pointer of the original object is free'd if needed. Ownership of + * the pointer is taken over. + * + * @param str lsm_str object to modify + * @param s string to convert into lsm string; ownership is taken over + */ +void lsm_str_overwrite(lsm_str *str, char *s); + +/** + * Overwrite an existing lsm_str so it now represents the new provided string. + * The string pointer of the original object is free'd if needed. The provided + * string is copied, leaving the original untouched. + * + * @param str lsm_str object to modify + * @param s string to copy into lsm string + */ +lsm_error lsm_str_overwrite_copy(lsm_str *str, const char *s); + +/** + * Same as `lsm_str_overwrite_copy`, except the length is explicitely specified, + * allowing you to easily "cut" parts of a C string out into an LSM string. + * + * @param str lsm_str object to modify + * @param s string to copy into lsm string + * @param len length of the string to copy + */ +lsm_error lsm_str_overwrite_copy_n(lsm_str *str, const char *s, uint64_t len); + +/** + * Deallocate the existing internal string if needed and replace the lsm_str + * with a string of length 0, wiping its contents. This function can be used as + * a substitute for lsm_str_free for stack-allocated structs. + * + * @param str string to wipe + */ +void lsm_str_zero(lsm_str *str); + +/** + * Deallocate the string and its internal char buffer if needed. Only call this + * on heap-allocated strings. + * + * @param str string to dealloate + */ +void lsm_str_free(lsm_str *str); + +/** + * Return the length of the string. + * + * @param str string to return length for. + */ +uint64_t lsm_str_len(const lsm_str *str); + +/** + * Return a pointer to the string's underlying char array. Note that this array + * will *not* neccessarily be null-terminatd. + * + * @param str string to return pointer for + */ +const char *lsm_str_ptr(const lsm_str *str); + +/** + * Returns the character at the specified position. + * + * @index index of character to return + */ +char lsm_str_char(const lsm_str *str, uint64_t index); + +/** + * Take a substring and copy it to a provided string object. + * + * @param out string to store new substring in. The contents of this string will + * be replaced. This string is assumed to be unitialized, so zero this string + * manually if you're overwriting an existing string. + * @param str string to take substring from + * @param start inclusive start index for the substring. If this is greater than + * or equal to the string's length, out will be a zero-length string. + * @param end exclusive end index for the substring + */ +lsm_error lsm_str_substr(lsm_str *out, const lsm_str *str, uint64_t start, + uint64_t end); + +/** + * Return the first index where s1 and s2 differ, starting at their respective + * offsets. If both strings are equal (or one is a prefix of the other), the + * result will be the length of the shortest string. The returned value is + * relative to the given offets. + * + * @param s1 string to compare + * @param s1_offset offset inside s1 to start comparing from + * @param s2 string to compare s1 to + * @param s2_offset offset inside s2 to start comparing from + */ +uint64_t lsm_str_cmp(const lsm_str *s1, uint64_t s1_offset, const lsm_str *s2, + uint64_t s2_offset); + +/** + * Checks whether the two strings are identical. + * + * @param s1 first string to compare + * @param s2 second string to compare + * @return true if their values are equal, false otherwise + */ +bool lsm_str_eq(const lsm_str *s1, const lsm_str *s2); + +/** + * Truncate an already initialized string in-place. + * + * @param s string to truncate + * @param new_len new length of the string. If new_len is >= the original + * length, this function does nothing. + */ +lsm_error lsm_str_truncate(lsm_str *s, uint64_t new_len); + +/** + * Split s at the specified index, saving the second half the string in s2. + * + * @param s string to split + * @param s2 string to store second part of s + * @param index position to split string. If index is the length of s or + * greater, s2 will simply be an empty string. + */ +lsm_error lsm_str_split(lsm_str *s, lsm_str *s2, uint64_t index); + +/** + * Append s2 to s. s2 is left untouched. + * + * @param s string to append s2 to + * @param s2 string to append to s + */ +lsm_error lsm_str_append(lsm_str *s, lsm_str *s2); + +#endif diff --git a/lsm/include/lsm/trie.h b/lsm/include/lsm/trie.h new file mode 100644 index 0000000..58db0b6 --- /dev/null +++ b/lsm/include/lsm/trie.h @@ -0,0 +1,60 @@ +#ifndef LSM_TRIE +#define LSM_TRIE + +#include "lsm.h" +#include "lsm/str.h" + +/** + * A struct representing a trie + */ +typedef struct lsm_trie lsm_trie; + +/** + * Initialize a new trie. + * + * @param ptr where to store the newly allocated pointer + */ +lsm_error lsm_trie_init(lsm_trie **ptr); + +/** + * Deallocate an entire trie, including all its nodes + * + * @param trie trie to free + */ +void lsm_trie_free(lsm_trie *trie); + +/** + * Insert a new element into the trie using the specified key. + * + * @param trie trie to insert into + * @param key key to insert data with + * @param data data to insert + */ +lsm_error lsm_trie_insert(lsm_trie *trie, const lsm_str *key, void *data); + +/** + * Search for an element in the trie. + * + * @param out where to store data pointer, if present + * @param trie trie to search in + * @param key key to search with + */ +lsm_error lsm_trie_search(void **out, const lsm_trie *trie, const lsm_str *key); + +/** + * Remove an element from the trie. + * + * @param out where to store the removed data pointer, if present. + * @param trie trie to remove from + * @param key key to remove + */ +lsm_error lsm_trie_remove(void **out, lsm_trie *trie, const lsm_str *key); + +/** + * Return the size of a trie + * + * @param trie trie to return size for + */ +uint64_t lsm_trie_size(const lsm_trie *trie); + +#endif diff --git a/lsm/src/_include/lsm/bt_internal.h b/lsm/src/_include/lsm/bt_internal.h new file mode 100644 index 0000000..4b55771 --- /dev/null +++ b/lsm/src/_include/lsm/bt_internal.h @@ -0,0 +1,38 @@ +#ifndef LSM_BT_INTERNAL +#define LSM_BT_INTERNAL + +#include + +#include "lsm.h" +#include "lsm/bt.h" + +/** + * Node inside a binary tree + */ +typedef struct lsm_bt_node { + struct lsm_bt_node *left; + struct lsm_bt_node *right; + void *data; + char key; +} lsm_bt_node; + +/** + * Initialize a new binary tree node + * + * @param ptr where to store newly allocated pointer + * @param key key for the node + * @param data data to store + */ +lsm_error lsm_bt_node_init(lsm_bt_node **ptr, const char key, void *data); + +/** + * Deallocate a single binary tree node + */ +void lsm_bt_node_free(lsm_bt_node *node); + +struct lsm_bt { + lsm_bt_node *root; + uint8_t size; +}; + +#endif diff --git a/lsm/src/_include/lsm/store_internal.h b/lsm/src/_include/lsm/store_internal.h new file mode 100644 index 0000000..e446a0d --- /dev/null +++ b/lsm/src/_include/lsm/store_internal.h @@ -0,0 +1,159 @@ +#ifndef LSM_STORE_INTERNAL +#define LSM_STORE_INTERNAL + +#include +#include + +#include "lsm/store.h" +#include "lsm/str_internal.h" +#include "lsm/trie.h" + +#define LSM_DB_FILE_NAME "lsm.db" +#define LSM_IDX_FILE_NAME "lsm.idx" +#define LSM_DATA_FILE_SUFFIX ".data" + +typedef struct lsm_attr { + uint8_t type; + lsm_str *str; +} lsm_attr; + +/** + * An entry inside an LSM store. + * + * Each entry consists of the key it's stored behind, zero or more attributes + * (metadata) and a data file. + */ +typedef struct lsm_entry { + lsm_str *key; + struct { + uint64_t bitmap[4]; + uint8_t count; + lsm_attr *items; + } attrs; + uint64_t data_len; + uint64_t idx_file_offset; +} lsm_entry; + +/** + * Allocate and initialize a new lsm_entry object. + * + * @param ptr where to store newly allocated pointer + */ +lsm_error lsm_entry_init(lsm_entry **ptr); + +/** + * Deallocate an existing entry + * + * @param entry pointer to entry + */ +void lsm_entry_free(lsm_entry *entry); + +/** + * Deallocate an existing lsm_entry object. + * + * @param entry object to deallocate + */ +void lsm_entry_free(lsm_entry *entry); + +typedef struct lsm_entry_wrapper { + pthread_rwlock_t lock; + lsm_entry *entry; +} lsm_entry_wrapper; + +lsm_error lsm_entry_wrapper_init(lsm_entry_wrapper **ptr); +void lsm_entry_wrapper_free(lsm_entry_wrapper *wrapper); + +typedef enum lsm_entry_handle_state : uint8_t { + lsm_entry_handle_state_new = 1 << 0, + lsm_entry_handle_state_updated = 1 << 1, + lsm_entry_handle_state_removed = 1 << 2, +} lsm_entry_handle_state; + +struct lsm_entry_handle { + lsm_entry_wrapper *wrapper; + lsm_store *store; + // Either read or append, depending on how it was opened + FILE *f; + // Current position in the file pointer + uint64_t pos; + // Required to determine in what way the database files need to be synced + uint64_t states; +}; + +lsm_error lsm_entry_handle_init(lsm_entry_handle **out); + +struct lsm_store { + lsm_trie *trie; + lsm_str *data_path; + + struct { + FILE *f; + uint64_t size; + pthread_mutex_t lock; + } db; + + struct { + FILE *f; + uint64_t size; + uint64_t block_count; + pthread_mutex_t lock; + } idx; +}; + +/** + * Read in the database and construct the in-memory trie index. This function + * assumes the provided store is a newly initialized empty store with the + * database files opened. + * + * @param store store to read + */ +lsm_error lsm_store_load_db(lsm_store *store); + +/** + * Write a new insert to the database. + * + * @param handle handle to added entry + */ +lsm_error lsm_entry_disk_insert(lsm_entry_handle *handle); + +/** + * Remove an entry from the database. + * + * @param handle handle to the removed entry + */ +lsm_error lsm_entry_disk_remove(lsm_entry_handle *handle); + +/** + * Update an existing entry already in the store. + * + * @param handle to updated entry + */ +lsm_error lsm_entry_disk_update(lsm_entry_handle *handle); + +/** + * Return the length of the path to this entry's data file + */ +uint64_t lsm_entry_data_path_len(const lsm_entry_handle *handle); + +/** + * Fill in the entry's data file path in the provided buffer. Use + * `lsm_entry_data_path_len` to allocate an appropriately-sized buffer + */ +void lsm_entry_data_path(char *buf, const lsm_entry_handle *handle); + +/** + * Open the entry's data file for reading + * + * @param handle handle to the entry + */ +lsm_error lsm_entry_data_open_read(lsm_entry_handle *handle); + +/** + * Open the entry's data file for writing. The file and all subdirectories in + * the data dir are created as needed. + * + * @param handle handle to the entry + */ +lsm_error lsm_entry_data_open_write(lsm_entry_handle *handle); + +#endif diff --git a/lsm/src/_include/lsm/str_internal.h b/lsm/src/_include/lsm/str_internal.h new file mode 100644 index 0000000..03f5395 --- /dev/null +++ b/lsm/src/_include/lsm/str_internal.h @@ -0,0 +1,16 @@ +#ifndef LSM_STR_INTERNAL +#define LSM_STR_INTERNAL + +#include + +#include "lsm/str.h" + +struct lsm_str { + uint64_t len; + union { + char *ptr; + char val[8]; + } data; +}; + +#endif diff --git a/lsm/src/_include/lsm/trie_internal.h b/lsm/src/_include/lsm/trie_internal.h new file mode 100644 index 0000000..e3526d9 --- /dev/null +++ b/lsm/src/_include/lsm/trie_internal.h @@ -0,0 +1,36 @@ +#ifndef LSM_TRIE_INTERNAL +#define LSM_TRIE_INTERNAL + +#include "lsm/bt_internal.h" +#include "lsm/str_internal.h" +#include "lsm/trie.h" + +/** + * A node inside a trie structure + */ +typedef struct lsm_trie_node { + lsm_bt bt; + lsm_str skip; + void *data; +} lsm_trie_node; + +/** + * Allocate and initialize a new trie node + * + * @param ptr pointer to store new node pointer + */ +lsm_error lsm_trie_node_init(lsm_trie_node **ptr); + +/** + * Deallocate a trie node + * + * @param node node to deallocate + */ +void lsm_trie_node_free(lsm_trie_node *node); + +struct lsm_trie { + lsm_trie_node *root; + uint64_t size; +}; + +#endif diff --git a/lsm/src/bt/lsm_bt.c b/lsm/src/bt/lsm_bt.c new file mode 100644 index 0000000..c09fa01 --- /dev/null +++ b/lsm/src/bt/lsm_bt.c @@ -0,0 +1,158 @@ +#include + +#include "lsm/bt_internal.h" + +lsm_error lsm_bt_node_init(lsm_bt_node **ptr, const char key, void *data) { + lsm_bt_node *node = calloc(1, sizeof(lsm_bt_node)); + + if (node == NULL) { + return lsm_error_failed_alloc; + } + + node->key = key; + node->data = data; + *ptr = node; + + return lsm_error_ok; +} + +void lsm_bt_node_free(lsm_bt_node *node) { free(node); } + +void lsm_bt_node_free_tree(lsm_bt_node *node) { + if (node->left != NULL) { + lsm_bt_node_free_tree(node->left); + lsm_bt_node_free(node->left); + } + + if (node->right != NULL) { + lsm_bt_node_free_tree(node->right); + lsm_bt_node_free(node->right); + } +} + +lsm_error lsm_bt_init(lsm_bt **ptr) { + lsm_bt *bt = calloc(1, sizeof(lsm_bt)); + + if (bt == NULL) { + return lsm_error_failed_alloc; + } + + *ptr = bt; + + return lsm_error_ok; +} + +void lsm_bt_clear(lsm_bt *bt) { + if (bt->root != NULL) { + lsm_bt_node_free_tree(bt->root); + lsm_bt_node_free(bt->root); + + bt->root = NULL; + bt->size = 0; + } +} + +void lsm_bt_free(lsm_bt *bt) { + lsm_bt_clear(bt); + free(bt); +} + +uint64_t lsm_bt_size(const lsm_bt *bt) { return bt->size; } + +lsm_error lsm_bt_insert(lsm_bt *bt, char key, void *data) { + lsm_bt_node **dest = &bt->root; + + // Traverse down the tree until we reach the new point to insert our node + while ((*dest != NULL) && ((*dest)->key != key)) { + dest = key < (*dest)->key ? &(*dest)->left : &(*dest)->right; + } + + if (*dest != NULL) { + return lsm_error_already_present; + } + + if (lsm_bt_node_init(dest, key, data) != lsm_error_ok) { + return lsm_error_failed_alloc; + } + + bt->size++; + + return lsm_error_ok; +} + +lsm_error lsm_bt_search(void **out, const lsm_bt *bt, char key) { + lsm_bt_node *node = bt->root; + + while ((node != NULL) && (node->key != key)) { + node = key < node->key ? node->left : node->right; + } + + if (node == NULL) { + return lsm_error_not_found; + } + + if (out != NULL) { + *out = node->data; + } + + return lsm_error_ok; +} + +lsm_error lsm_bt_remove(void **out, lsm_bt *bt, char key) { + lsm_bt_node **dest = &bt->root; + + while ((*dest != NULL) && ((*dest)->key != key)) { + dest = key < (*dest)->key ? &(*dest)->left : &(*dest)->right; + } + + if (*dest == NULL) { + return lsm_error_not_found; + } + + if (out != NULL) { + *out = (*dest)->data; + } + bt->size--; + + if (((*dest)->left != NULL) && ((*dest)->right != NULL)) { + lsm_bt_node **succ = &(*dest)->right; + + while ((*succ)->left != NULL) { + succ = &(*succ)->left; + } + + (*dest)->key = (*succ)->key; + (*dest)->data = (*succ)->data; + + lsm_bt_node *succ_replacement = (*succ)->right; + lsm_bt_node_free(*succ); + *succ = succ_replacement; + } else { + lsm_bt_node *replacement = + (*dest)->left != NULL ? (*dest)->left : (*dest)->right; + lsm_bt_node_free(*dest); + *dest = replacement; + } + + return lsm_error_ok; +} + +lsm_error lsm_bt_replace(void **out, lsm_bt *bt, char key, void *data) { + lsm_bt_node *node = bt->root; + + while ((node != NULL) && (node->key != key)) { + node = key < node->key ? node->left : node->right; + } + + if (node == NULL) { + return lsm_error_not_found; + } + + if (out != NULL) { + *out = node->data; + } + + node->data = data; + + return lsm_error_ok; +} diff --git a/lsm/src/store/lsm_store.c b/lsm/src/store/lsm_store.c new file mode 100644 index 0000000..7b3ed36 --- /dev/null +++ b/lsm/src/store/lsm_store.c @@ -0,0 +1,200 @@ +#include +#include +#include + +#include "lsm.h" +#include "lsm/store.h" +#include "lsm/store_internal.h" +#include "lsm/trie.h" + +lsm_error lsm_store_init(lsm_store **ptr) { + lsm_store *store = calloc(1, sizeof(lsm_store)); + + if (store == NULL) { + return lsm_error_failed_alloc; + } + + LSM_RES2(lsm_trie_init(&store->trie), free(store)); + + pthread_mutex_init(&store->db.lock, NULL); + pthread_mutex_init(&store->idx.lock, NULL); + + *ptr = store; + + return lsm_error_ok; +} + +uint64_t lsm_store_size(const lsm_store *store) { + return lsm_trie_size(store->trie); +} + +lsm_error lsm_store_open_read(lsm_entry_handle **out, lsm_store *store, + const lsm_str *key) { + lsm_entry_wrapper *wrapper; + + LSM_RES(lsm_trie_search((void **)&wrapper, store->trie, key)); + + // Try to get a read lock on the entry's lock + if (pthread_rwlock_tryrdlock(&wrapper->lock) != 0) { + return lsm_error_lock_busy; + } + + // While the trie's data field will never be NULL, the actual entry pointer + // might be + if (wrapper->entry == NULL) { + pthread_rwlock_unlock(&wrapper->lock); + + return lsm_error_not_found; + } + + lsm_entry_handle *handle; + LSM_RES2(lsm_entry_handle_init(&handle), + pthread_rwlock_unlock(&wrapper->lock)); + + handle->wrapper = wrapper; + handle->store = store; + *out = handle; + + return lsm_error_ok; +} + +lsm_error lsm_store_open_write(lsm_entry_handle **out, lsm_store *store, + const lsm_str *key) { + lsm_entry_wrapper *wrapper; + LSM_RES(lsm_trie_search((void **)&wrapper, store->trie, key)); + + // Try to get a write lock on the entry's lock + // TODO make this timeout to not block + if (pthread_rwlock_wrlock(&wrapper->lock) != 0) { + return lsm_error_lock_busy; + } + + // While the trie's data field will never be NULL, the actual entry pointer + // might be + if (wrapper->entry == NULL) { + pthread_rwlock_unlock(&wrapper->lock); + + return lsm_error_not_found; + } + + lsm_entry_handle *handle; + LSM_RES2(lsm_entry_handle_init(&handle), + pthread_rwlock_unlock(&wrapper->lock)); + + handle->wrapper = wrapper; + handle->store = store; + *out = handle; + + return lsm_error_ok; +} + +lsm_error lsm_store_insert(lsm_entry_handle **out, lsm_store *store, + lsm_str *key) { + // TODO what happens when two inserts to the same key happen at the same time? + lsm_entry_wrapper *wrapper; + + // If a key was previously removed from the trie, the wrapper will already be + // present in the trie + if (lsm_trie_search((void **)&wrapper, store->trie, key) == + lsm_error_not_found) { + LSM_RES(lsm_entry_wrapper_init(&wrapper)); + + pthread_rwlock_wrlock(&wrapper->lock); + + LSM_RES2(lsm_trie_insert(store->trie, key, wrapper), + lsm_entry_wrapper_free(wrapper)); + } else { + pthread_rwlock_wrlock(&wrapper->lock); + + if (wrapper->entry != NULL) { + pthread_rwlock_unlock(&wrapper->lock); + + return lsm_error_already_present; + } + } + + lsm_entry *entry; + LSM_RES2(lsm_entry_init(&entry), pthread_rwlock_unlock(&wrapper->lock)); + + entry->key = key; + wrapper->entry = entry; + + lsm_entry_handle *handle; + LSM_RES2(lsm_entry_handle_init(&handle), + pthread_rwlock_unlock(&wrapper->lock)); + + // No need to set the handle's file, as the entry doesn't have any data yet + handle->wrapper = wrapper; + handle->store = store; + + // Newly inserted entries are always dirty + handle->states |= lsm_entry_handle_state_new; + + *out = handle; + + return lsm_error_ok; +} + +void lsm_entry_remove(lsm_entry_handle *handle) { + handle->states |= lsm_entry_handle_state_removed; +} + +lsm_error lsm_entry_data_append(lsm_entry_handle *handle, const lsm_str *data) { + if (lsm_str_len(data) == 0) { + return lsm_error_ok; + } + + lsm_entry *entry = handle->wrapper->entry; + + uint64_t new_len = entry->data_len + lsm_str_len(data); + const char *data_s = lsm_str_ptr(data); + + // Entries don't open their file unless needed + if (handle->f == NULL) { + // An entry with no existing data will not have a data file yet, so we set + // create to true then + LSM_RES(lsm_entry_data_open_write(handle)); + } + + size_t written = 0; + + // TODO what happens when I/O fails? + while (written < data->len) { + written += + fwrite(&data_s[written], sizeof(char), data->len - written, handle->f); + } + + entry->data_len = new_len; + handle->states |= lsm_entry_handle_state_updated; + + return lsm_error_ok; +} + +lsm_error lsm_entry_data_read(uint64_t *out, char *buf, + lsm_entry_handle *handle, uint64_t len) { + const lsm_entry *entry = handle->wrapper->entry; + + if (entry->data_len == 0) { + *out = 0; + + return lsm_error_ok; + } + + // Entries don't open their file unless needed + if (handle->f == NULL) { + LSM_RES(lsm_entry_data_open_read(handle)); + } + + uint64_t read; + + read = fread(buf, sizeof(char), len, handle->f); + + if ((read == 0) && (ferror(handle->f) != 0)) { + return lsm_error_failed_io; + } + + handle->pos += read; + *out = read; + + return lsm_error_ok; +} diff --git a/lsm/src/store/lsm_store_disk_read.c b/lsm/src/store/lsm_store_disk_read.c new file mode 100644 index 0000000..721b4f3 --- /dev/null +++ b/lsm/src/store/lsm_store_disk_read.c @@ -0,0 +1,207 @@ +#include +#include + +#include "lsm.h" +#include "lsm/store_internal.h" + +lsm_error lsm_store_load(lsm_store **ptr, lsm_str *data_path) { + lsm_store *store; + LSM_RES(lsm_store_init(&store)); + + // Try to open an existing db file or create a new one otherwise + // This shit is why I need to improve the str library + char db_file_path[lsm_str_len(data_path) + strlen(LSM_DB_FILE_NAME) + 2]; + memcpy(db_file_path, lsm_str_ptr(data_path), + lsm_str_len(data_path) * sizeof(char)); + sprintf(&db_file_path[lsm_str_len(data_path)], "/%s", LSM_DB_FILE_NAME); + + FILE *db_file = fopen(db_file_path, "r+b"); + + if (db_file == NULL) { + // Create the file first, then reopen it in extended read + db_file = fopen(db_file_path, "wb"); + + if (db_file == NULL) { + return lsm_error_failed_io; + } + + fclose(db_file); + + db_file = fopen(db_file_path, "r+b"); + + if (db_file == NULL) { + return lsm_error_failed_io; + } + } + + // Same for idx file + char idx_file_path[lsm_str_len(data_path) + strlen(LSM_IDX_FILE_NAME) + 2]; + memcpy(idx_file_path, lsm_str_ptr(data_path), + lsm_str_len(data_path) * sizeof(char)); + sprintf(&idx_file_path[lsm_str_len(data_path)], "/%s", LSM_IDX_FILE_NAME); + + FILE *idx_file = fopen(idx_file_path, "r+b"); + + if (idx_file == NULL) { + // Create the file first + idx_file = fopen(idx_file_path, "wb"); + + if (idx_file == NULL) { + fclose(db_file); + + return lsm_error_failed_io; + } + + // The database code expects the idx file to start with how many blocks it + // contains, so we write that here + uint64_t num = 0; + + if (fwrite(&num, sizeof(uint64_t), 1, idx_file) == 0) { + fclose(db_file); + fclose(idx_file); + + return lsm_error_failed_io; + } + + fflush(idx_file); + fclose(idx_file); + + // If opening it in extended read mode still fails now, there's a problem + idx_file = fopen(idx_file_path, "r+b"); + + if (idx_file == NULL) { + fclose(db_file); + + return lsm_error_failed_io; + } + } + + store->data_path = data_path; + store->db.f = db_file; + store->idx.f = idx_file; + + LSM_RES2(lsm_store_load_db(store), { + fclose(db_file); + fclose(idx_file); + }); + + *ptr = store; + + return lsm_error_ok; +} + +static lsm_error lsm_fread(void *out, uint64_t *sum, FILE *f, uint64_t size, + uint64_t count) { + size_t res = fread(out, size, count, f); + + if (res < count) { + return lsm_error_failed_io; + } + + if (sum != NULL) { + *sum += size * count; + } + + return lsm_error_ok; +} + +static lsm_error lsm_entry_read_str(lsm_str **out, uint64_t *sum, FILE *f) { + uint64_t len; + LSM_RES(lsm_fread(&len, sum, f, sizeof(uint64_t), 1)); + + char *buf = malloc(len + 1); + + if (buf == NULL) { + return lsm_error_failed_alloc; + } + + buf[len] = '\0'; + + uint64_t read = 0; + + while (read < len) { + read += fread(&buf[read], 1, len - read, f); + } + + if (sum != NULL) { + *sum += len; + } + + return lsm_str_init(out, buf); +} + +static lsm_error lsm_entry_read_attrs(uint64_t *sum, lsm_entry_handle *handle, + FILE *db_file) { + uint8_t attr_count; + LSM_RES(lsm_fread(&attr_count, sum, db_file, sizeof(uint8_t), 1)); + + // attr_type, val_len + uint8_t attr_type; + lsm_str *val; + + for (uint64_t i = 0; i < attr_count; i++) { + LSM_RES(lsm_fread(&attr_type, sum, db_file, sizeof(uint8_t), 1)); + LSM_RES(lsm_entry_read_str(&val, sum, db_file)); + LSM_RES(lsm_entry_attr_insert(handle, attr_type, val)); + } + + return lsm_error_ok; +} +static lsm_error lsm_fseek(FILE *f, uint64_t pos) { + if (fseek(f, pos, SEEK_SET) != 0) { + return lsm_error_failed_io; + } + + return lsm_error_ok; +} + +/** + * Insert a new entry by reading it from the db file + */ +lsm_error lsm_store_insert_from_db(lsm_store *store, uint64_t pos, + uint64_t idx_file_offset) { + LSM_RES(lsm_fseek(store->db.f, pos)); + + lsm_str *key; + LSM_RES(lsm_entry_read_str(&key, &store->db.size, store->db.f)); + + lsm_entry_handle *handle; + LSM_RES(lsm_store_insert(&handle, store, key)); + + LSM_RES(lsm_fread(&handle->wrapper->entry->data_len, &store->db.size, + store->db.f, sizeof(uint64_t), 1)); + LSM_RES(lsm_entry_read_attrs(&store->db.size, handle, store->db.f)); + + handle->wrapper->entry->idx_file_offset = idx_file_offset; + + handle->states = 0; + lsm_entry_close(handle); + + return lsm_error_ok; +} + +lsm_error lsm_store_load_db(lsm_store *store) { + uint64_t db_dim[2]; + + rewind(store->idx.f); + + // idx file starts with block count + LSM_RES(lsm_fread(&store->idx.block_count, &store->idx.size, store->idx.f, + sizeof(uint64_t), 1)); + + for (uint64_t i = 0; i < store->idx.block_count; i++) { + uint64_t idx_file_offset = store->idx.size; + + LSM_RES(lsm_fread(&db_dim, &store->idx.size, store->idx.f, sizeof(uint64_t), + 2)); + + // We zero out the length of entries if they're no longer valid + if (db_dim[1] == 0) { + continue; + } + + LSM_RES(lsm_store_insert_from_db(store, db_dim[0], idx_file_offset)); + } + + return lsm_error_ok; +} diff --git a/lsm/src/store/lsm_store_disk_write.c b/lsm/src/store/lsm_store_disk_write.c new file mode 100644 index 0000000..31f907b --- /dev/null +++ b/lsm/src/store/lsm_store_disk_write.c @@ -0,0 +1,172 @@ +#include "lsm/store_internal.h" + +static lsm_error lsm_fwrite(uint64_t *sum, FILE *f, uint64_t size, + uint64_t count, const void *val) { + size_t res = fwrite(val, size, count, f); + + if (res < count) { + return lsm_error_failed_io; + } + + if (sum != NULL) { + *sum += size * count; + } + + return lsm_error_ok; +} + +static lsm_error lsm_write_str(uint64_t *sum, FILE *f, const lsm_str *s) { + uint64_t len = lsm_str_len(s); + + LSM_RES(lsm_fwrite(sum, f, sizeof(uint64_t), 1, &len)); + + uint64_t written = 0; + + do { + written += fwrite(lsm_str_ptr(s), sizeof(char), len - written, f); + } while (written < len); + + if (sum != NULL) { + *sum += len * sizeof(char); + } + + return lsm_error_ok; +} + +static lsm_error lsm_fseek(FILE *f, uint64_t pos) { + if (fseek(f, pos, SEEK_SET) != 0) { + return lsm_error_failed_io; + } + + return lsm_error_ok; +} + +lsm_error lsm_write_db_entry(uint64_t *size, FILE *db_file, lsm_entry *entry, + uint64_t pos) { + *size = 0; + + LSM_RES(lsm_fseek(db_file, pos)); + + LSM_RES(lsm_write_str(size, db_file, entry->key)); + LSM_RES(lsm_fwrite(size, db_file, sizeof(uint64_t), 1, &entry->data_len)); + LSM_RES(lsm_fwrite(size, db_file, sizeof(uint8_t), 1, &entry->attrs.count)); + + for (uint8_t i = 0; i < entry->attrs.count; i++) { + LSM_RES(lsm_fwrite(size, db_file, sizeof(uint8_t), 1, + &entry->attrs.items[i].type)); + LSM_RES(lsm_write_str(size, db_file, entry->attrs.items[i].str)); + } + + return lsm_error_ok; +} + +lsm_error lsm_write_idx_entry(uint64_t *size, FILE *idx_file, uint64_t offset, + uint64_t len, uint64_t pos) { + *size = 0; + + LSM_RES(lsm_fseek(idx_file, pos)); + + LSM_RES(lsm_fwrite(size, idx_file, sizeof(uint64_t), 1, &offset)); + LSM_RES(lsm_fwrite(size, idx_file, sizeof(uint64_t), 1, &len)); + + return lsm_error_ok; +} + +lsm_error lsm_entry_disk_insert(lsm_entry_handle *handle) { + lsm_store *store = handle->store; + + pthread_mutex_lock(&store->db.lock); + + uint64_t db_entry_index = store->db.size; + + uint64_t db_entry_size; + lsm_error res = lsm_write_db_entry(&db_entry_size, store->db.f, + handle->wrapper->entry, store->db.size); + fflush(store->db.f); + + pthread_mutex_unlock(&store->db.lock); + + if (res != lsm_error_ok) { + return res; + } + + // Append entry to index file + pthread_mutex_lock(&store->idx.lock); + + uint64_t idx_entry_index = store->idx.size; + + uint64_t idx_entry_size; + res = lsm_write_idx_entry(&idx_entry_size, store->idx.f, db_entry_index, + db_entry_size, store->idx.size); + + if (res == lsm_error_ok) { + // Update the counter at the beginning of the file + rewind(store->idx.f); + + uint64_t new_block_count = store->idx.block_count + 1; + + res = lsm_fwrite(NULL, store->idx.f, sizeof(uint64_t), 1, &new_block_count); + + if (res == lsm_error_ok) { + // Only if we successfully updated the on-disk counter do we make the code + // aware that the files' sizes have increased. This way, if a write to the + // counter fails, the code will simply reuse the already written content. + store->idx.size += idx_entry_size; + store->idx.block_count = new_block_count; + store->db.size += db_entry_size; + + handle->wrapper->entry->idx_file_offset = idx_entry_index; + } + } + + fflush(store->idx.f); + pthread_mutex_unlock(&store->idx.lock); + + return res; +} + +// Marking an entry as removed in the idx file is simply setting the length of +// its entry to zero +lsm_error lsm_entry_disk_remove(lsm_entry_handle *handle) { + lsm_store *store = handle->store; + const lsm_entry *entry = handle->wrapper->entry; + + pthread_mutex_lock(&store->idx.lock); + + lsm_error res = + lsm_fseek(store->idx.f, entry->idx_file_offset + sizeof(uint64_t)); + + if (res != lsm_error_ok) { + pthread_mutex_unlock(&store->idx.lock); + + return res; + } + + uint64_t val = 0; + res = lsm_fwrite(NULL, store->idx.f, sizeof(uint64_t), 1, &val); + + pthread_mutex_unlock(&store->idx.lock); + + if (res != lsm_error_ok) { + return res; + } + + fflush(store->idx.f); + + // Remove data file if present + if (entry->data_len > 0) { + if (handle->f != NULL) { + fclose(handle->f); + handle->f = NULL; + } + + char data_path[lsm_entry_data_path_len(handle) + 1]; + lsm_entry_data_path(data_path, handle); + + if (remove(data_path) != 0) { + return lsm_error_failed_io; + } + } + + return lsm_error_ok; +} diff --git a/lsm/src/store/lsm_store_entry.c b/lsm/src/store/lsm_store_entry.c new file mode 100644 index 0000000..63d90a7 --- /dev/null +++ b/lsm/src/store/lsm_store_entry.c @@ -0,0 +1,318 @@ +#include +#include +#include +#include +#include +#include + +#include "lsm.h" +#include "lsm/store.h" +#include "lsm/store_internal.h" +#include "lsm/str.h" + +lsm_error lsm_entry_init(lsm_entry **ptr) { + lsm_entry *entry = calloc(1, sizeof(lsm_entry)); + + if (entry == NULL) { + return lsm_error_failed_alloc; + } + + *ptr = entry; + + return lsm_error_ok; +} + +void lsm_entry_free(lsm_entry *entry) { + if (entry->attrs.count > 0) { + free(entry->attrs.items); + } + + free(entry); +} + +lsm_error lsm_entry_wrapper_init(lsm_entry_wrapper **ptr) { + lsm_entry_wrapper *wrap = calloc(1, sizeof(lsm_entry_wrapper)); + + if (wrap == NULL) { + return lsm_error_failed_alloc; + } + + pthread_rwlock_init(&wrap->lock, NULL); + + *ptr = wrap; + + return lsm_error_ok; +} + +void lsm_entry_wrapper_free(lsm_entry_wrapper *wrapper) { free(wrapper); } + +lsm_error lsm_entry_handle_init(lsm_entry_handle **out) { + lsm_entry_handle *handle = calloc(1, sizeof(lsm_entry_handle)); + + if (handle == NULL) { + return lsm_error_failed_alloc; + } + + *out = handle; + + return lsm_error_ok; +} + +void lsm_entry_close(lsm_entry_handle *handle) { + if (handle->f != NULL) { + fclose(handle->f); + } + + // TODO handle errors here + if ((handle->states & lsm_entry_handle_state_new) && + !(handle->states & lsm_entry_handle_state_removed)) { + lsm_entry_disk_insert(handle); + } else if ((handle->states & lsm_entry_handle_state_removed) && + !(handle->states & lsm_entry_handle_state_new)) { + lsm_entry_disk_remove(handle); + + lsm_entry_free(handle->wrapper->entry); + handle->wrapper->entry = NULL; + } else if (handle->states & lsm_entry_handle_state_updated) { + /* lsm_entry_disk_update(handle); */ + } + + pthread_rwlock_unlock(&handle->wrapper->lock); + free(handle); +} + +bool lsm_entry_attr_present(lsm_entry_handle *handle, uint8_t type) { + return (handle->wrapper->entry->attrs.bitmap[type / 64] & + (((uint64_t)1) << (type % 64))) != 0; +} + +lsm_error lsm_entry_attr_get(lsm_str **out, lsm_entry_handle *handle, + uint8_t type) { + if (!lsm_entry_attr_present(handle, type)) { + return lsm_error_not_found; + } + + lsm_entry *entry = handle->wrapper->entry; + uint64_t i = 0; + + while (entry->attrs.items[i].type != type) { + i++; + } + + *out = entry->attrs.items[i].str; + + return lsm_error_ok; +} + +lsm_error lsm_entry_attr_get_uint64_t(uint64_t *out, lsm_entry_handle *handle, + uint8_t type) { + lsm_str *s; + + LSM_RES(lsm_entry_attr_get(&s, handle, type)); + + uint64_t num = 0; + + for (uint8_t i = 0; i < sizeof(uint64_t) / sizeof(char); i++) { + ((char *)&num)[i] = lsm_str_char(s, i); + } + + *out = num; + + return lsm_error_ok; +} + +lsm_error lsm_entry_attr_get_uint8_t(uint8_t *out, lsm_entry_handle *handle, + uint8_t type) { + lsm_str *s; + + LSM_RES(lsm_entry_attr_get(&s, handle, type)); + + *out = lsm_str_char(s, 0); + + return lsm_error_ok; +} + +lsm_error lsm_entry_attr_remove(lsm_str **out, lsm_entry_handle *handle, + uint8_t type) { + if (!lsm_entry_attr_present(handle, type)) { + return lsm_error_not_found; + } + + lsm_entry *entry = handle->wrapper->entry; + + if (entry->attrs.count == 1) { + *out = entry->attrs.items[0].str; + + free(entry->attrs.items); + entry->attrs.items = NULL; + entry->attrs.count = 0; + entry->attrs.bitmap[type / 64] &= ~(((uint64_t)1) << (type % 64)); + + return lsm_error_ok; + } + + uint64_t i = 0; + + while (entry->attrs.items[i].type != type) { + i++; + } + + lsm_attr *new_attrs = malloc((entry->attrs.count - 1) * sizeof(lsm_attr)); + + if (new_attrs == NULL) { + return lsm_error_failed_alloc; + } + + if (out != NULL) { + *out = entry->attrs.items[i].str; + } + + memcpy(new_attrs, entry->attrs.items, i * sizeof(lsm_attr)); + memcpy(&new_attrs[i], &entry->attrs.items[i + 1], + (entry->attrs.count - i - 1) * sizeof(lsm_attr)); + + free(entry->attrs.items); + + entry->attrs.items = new_attrs; + entry->attrs.count--; + entry->attrs.bitmap[type / 64] &= ~(((uint64_t)1) << (type % 64)); + + handle->states |= lsm_entry_handle_state_updated; + + return lsm_error_ok; +} + +lsm_error lsm_entry_attr_insert(lsm_entry_handle *handle, uint8_t type, + lsm_str *data) { + if (lsm_entry_attr_present(handle, type)) { + return lsm_error_already_present; + } + + lsm_entry *entry = handle->wrapper->entry; + + lsm_attr *new_attrs = + realloc(entry->attrs.items, (entry->attrs.count + 1) * sizeof(lsm_attr)); + + if (new_attrs == NULL) { + return lsm_error_failed_alloc; + } + + new_attrs[entry->attrs.count].type = type; + new_attrs[entry->attrs.count].str = data; + + entry->attrs.items = new_attrs; + entry->attrs.count++; + entry->attrs.bitmap[type / 64] |= ((uint64_t)1) << (type % 64); + + handle->states |= lsm_entry_handle_state_updated; + + return lsm_error_ok; +} + +lsm_error lsm_entry_attr_insert_uint64_t(lsm_entry_handle *handle, uint8_t type, + uint64_t data) { + lsm_str *s; + LSM_RES( + lsm_str_init_copy_n(&s, (char *)&data, sizeof(uint64_t) / sizeof(char))); + + return lsm_entry_attr_insert(handle, type, s); +} + +lsm_error lsm_entry_attr_insert_uint8_t(lsm_entry_handle *handle, uint8_t type, + uint8_t data) { + lsm_str *s; + LSM_RES( + lsm_str_init_copy_n(&s, (char *)&data, sizeof(uint8_t) / sizeof(char))); + + return lsm_entry_attr_insert(handle, type, s); +} + +uint64_t lsm_entry_data_len(lsm_entry_handle *handle) { + return handle->wrapper->entry->data_len; +} + +uint64_t lsm_entry_data_path_len(const lsm_entry_handle *handle) { + const lsm_str *data_path = handle->store->data_path; + const lsm_str *key = handle->wrapper->entry->key; + + uint8_t levels = + key->len <= LSM_STORE_DATA_LEVELS ? key->len : LSM_STORE_DATA_LEVELS; + + return data_path->len + 1 + 2 * levels + key->len + + strlen(LSM_DATA_FILE_SUFFIX); +} + +void lsm_entry_data_path(char *path, const lsm_entry_handle *handle) { + const lsm_str *data_path = handle->store->data_path; + const lsm_str *key = handle->wrapper->entry->key; + + uint8_t levels = + key->len > LSM_STORE_DATA_LEVELS ? LSM_STORE_DATA_LEVELS : key->len; + + memcpy(path, lsm_str_ptr(data_path), data_path->len); + path[data_path->len] = '/'; + + uint64_t index = data_path->len + 1; + + // Create each directory in the file hierarchy + // cppcheck-suppress knownConditionTrueFalse + for (uint8_t i = 0; i < levels; i++) { + path[index] = lsm_str_char(key, i); + path[index + 1] = '/'; + + index += 2; + } + + memcpy(&path[index], lsm_str_ptr(key), lsm_str_len(key)); + + index += lsm_str_len(key); + strcpy(&path[index], LSM_DATA_FILE_SUFFIX); +} + +lsm_error lsm_entry_data_open_write(lsm_entry_handle *handle) { + char path[lsm_entry_data_path_len(handle) + 1]; + lsm_entry_data_path(path, handle); + + const lsm_str *data_path = handle->store->data_path; + const lsm_str *key = handle->wrapper->entry->key; + + uint8_t levels = + key->len <= LSM_STORE_DATA_LEVELS ? key->len : LSM_STORE_DATA_LEVELS; + + // Create all required directories in the path + // cppcheck-suppress knownConditionTrueFalse + for (uint8_t i = 0; i < levels; i++) { + path[data_path->len + 2 * (i + 1)] = '\0'; + + if ((mkdir(path, 0755) != 0) && (errno != EEXIST)) { + return lsm_error_failed_io; + } + + path[data_path->len + 2 * (i + 1)] = '/'; + } + + FILE *f = fopen(path, "ab"); + + if (f == NULL) { + return lsm_error_failed_io; + } + + handle->f = f; + + return lsm_error_ok; +} + +lsm_error lsm_entry_data_open_read(lsm_entry_handle *handle) { + char path[lsm_entry_data_path_len(handle) + 1]; + lsm_entry_data_path(path, handle); + + FILE *f = fopen(path, "rb"); + + if (f == NULL) { + return lsm_error_failed_io; + } + + handle->f = f; + + return lsm_error_ok; +} diff --git a/lsm/src/str/lsm_str.c b/lsm/src/str/lsm_str.c new file mode 100644 index 0000000..217bad7 --- /dev/null +++ b/lsm/src/str/lsm_str.c @@ -0,0 +1,253 @@ +#include +#include +#include + +#include "lsm.h" +#include "lsm/str_internal.h" + +#define MIN(x, y) (((x) < (y)) ? (x) : (y)) + +lsm_error lsm_str_init(lsm_str **ptr, char *s) { + lsm_str *str = calloc(1, sizeof(lsm_str)); + + if (str == NULL) { + return lsm_error_failed_alloc; + } + + lsm_str_overwrite(str, s); + + *ptr = str; + + return lsm_error_ok; +} + +lsm_error lsm_str_init_zero(lsm_str **ptr) { + lsm_str *str = calloc(1, sizeof(lsm_str)); + + if (str == NULL) { + return lsm_error_failed_alloc; + } + + *ptr = str; + + return lsm_error_ok; +} + +lsm_error lsm_str_init_copy(lsm_str **ptr, const char *s) { + lsm_str *str = calloc(1, sizeof(lsm_str)); + + if (str == NULL) { + return lsm_error_failed_alloc; + } + + lsm_str_overwrite_copy(str, s); + + *ptr = str; + + return lsm_error_ok; +} + +lsm_error lsm_str_init_copy_n(lsm_str **ptr, const char *s, uint64_t len) { + lsm_str *str = calloc(1, sizeof(lsm_str)); + + if (str == NULL) { + return lsm_error_failed_alloc; + } + + lsm_str_overwrite_copy_n(str, s, len); + + *ptr = str; + + return lsm_error_ok; +} + +void lsm_str_overwrite(lsm_str *str, char *s) { + str->len = strlen(s); + + if (str->len <= 8) { + memcpy(str->data.val, s, str->len); + free(s); + } else { + str->data.ptr = s; + } +} + +lsm_error lsm_str_overwrite_copy(lsm_str *str, const char *s) { + return lsm_str_overwrite_copy_n(str, s, strlen(s)); +} + +lsm_error lsm_str_overwrite_copy_n(lsm_str *str, const char *s, uint64_t len) { + if (len <= 8) { + memcpy(str->data.val, s, len); + } else { + char *buf = malloc(len * sizeof(char)); + + if (buf == NULL) { + return lsm_error_failed_alloc; + } + + memcpy(buf, s, len); + str->data.ptr = buf; + } + + str->len = len; + + return lsm_error_ok; +} + +void lsm_str_zero(lsm_str *str) { + if (str->len > 8) { + free(str->data.ptr); + } + + str->len = 0; +} + +void lsm_str_free(lsm_str *str) { + lsm_str_zero(str); + free(str); +} + +uint64_t lsm_str_len(const lsm_str *str) { return str->len; } + +const char *lsm_str_ptr(const lsm_str *str) { + if (str->len <= 8) { + return str->data.val; + } else { + return str->data.ptr; + } +} + +char lsm_str_char(const lsm_str *str, uint64_t index) { + if (str->len <= 8) { + return str->data.val[index]; + } else { + return str->data.ptr[index]; + } +} + +lsm_error lsm_str_substr(lsm_str *out, const lsm_str *str, uint64_t start, + uint64_t end) { + // A substring that starts past the string's length will have length 0 + uint64_t len = start < str->len ? end - start : 0; + const char *str_ptr = lsm_str_ptr(str); + + if (len <= 8) { + /* lsm_str_zero(out); */ + memcpy(out->data.val, &str_ptr[start], len); + } else { + char *buf = malloc(len * sizeof(char)); + + if (buf == NULL) { + return lsm_error_failed_alloc; + } + + memcpy(buf, &str_ptr[start], len); + + /* lsm_str_zero(out); */ + out->data.ptr = buf; + } + + out->len = len; + + return lsm_error_ok; +} + +uint64_t lsm_str_cmp(const lsm_str *s1, uint64_t s1_offset, const lsm_str *s2, + uint64_t s2_offset) { + uint64_t index = 0; + uint64_t max_len = MIN(s1->len - s1_offset, s2->len - s2_offset); + + while ((index < max_len) && (lsm_str_char(s1, s1_offset + index) == + lsm_str_char(s2, s2_offset + index))) { + index++; + } + + return index; +} + +lsm_error lsm_str_truncate(lsm_str *s, uint64_t new_len) { + if (new_len >= s->len) { + return lsm_error_ok; + } + + if (new_len <= 8) { + char *s_buf = s->data.ptr; + + memcpy(s->data.val, lsm_str_ptr(s), new_len); + + if (s->len > 8) { + free(s_buf); + } + } else { + char *buf = malloc(new_len * sizeof(char)); + + if (buf == NULL) { + return lsm_error_failed_alloc; + } + + memcpy(buf, s->data.ptr, new_len); + free(s->data.ptr); + + s->data.ptr = buf; + } + + s->len = new_len; + + return lsm_error_ok; +} + +lsm_error lsm_str_split(lsm_str *s, lsm_str *s2, uint64_t index) { + lsm_error res = lsm_str_substr(s2, s, index, s->len); + + if (res != lsm_error_ok) { + return res; + } + + return lsm_str_truncate(s, index); +} + +bool lsm_str_eq(const lsm_str *s1, const lsm_str *s2) { + if (s1->len != s2->len) { + return false; + } + + return memcmp(lsm_str_ptr(s1), lsm_str_ptr(s2), s1->len) == 0; +} + +lsm_error lsm_str_append(lsm_str *s, lsm_str *s2) { + if (s2->len == 0) { + return lsm_error_ok; + } + + uint64_t new_len = s->len + s2->len; + + if (new_len <= 8) { + memcpy(&s->data.val[s->len], s2->data.val, s2->len); + } else { + char *buf; + + if (s->len <= 8) { + buf = malloc(new_len * sizeof(char)); + + if (buf == NULL) { + return lsm_error_failed_alloc; + } + + memcpy(buf, s->data.val, s->len); + } else { + buf = realloc(s->data.ptr, new_len * sizeof(char)); + + if (buf == NULL) { + return lsm_error_failed_alloc; + } + } + + memcpy(&buf[s->len], lsm_str_ptr(s2), s2->len); + s->data.ptr = buf; + } + + s->len += s2->len; + + return lsm_error_ok; +} diff --git a/lsm/src/trie/lsm_trie.c b/lsm/src/trie/lsm_trie.c new file mode 100644 index 0000000..0df8515 --- /dev/null +++ b/lsm/src/trie/lsm_trie.c @@ -0,0 +1,263 @@ +#include + +#include "lsm.h" +#include "lsm/trie_internal.h" + +lsm_error lsm_trie_node_init(lsm_trie_node **ptr) { + lsm_trie_node *node = calloc(1, sizeof(lsm_trie_node)); + + if (node == NULL) { + return lsm_error_failed_alloc; + } + + *ptr = node; + + return lsm_error_ok; +} + +void lsm_trie_node_free(lsm_trie_node *node) { + lsm_bt_clear(&node->bt); + lsm_str_zero(&node->skip); + free(node); +} + +lsm_error lsm_trie_init(lsm_trie **ptr) { + lsm_trie *trie = calloc(1, sizeof(lsm_trie)); + + if (trie == NULL) { + return lsm_error_failed_alloc; + } + + lsm_trie_node *root; + lsm_error res = lsm_trie_node_init(&root); + + if (res != lsm_error_ok) { + free(trie); + + return res; + } + + trie->root = root; + *ptr = trie; + + return lsm_error_ok; +} + +uint64_t lsm_trie_size(const lsm_trie *trie) { return trie->size; } + +lsm_error lsm_trie_insert(lsm_trie *trie, const lsm_str *key, void *data) { + // NULL is not allowed as a data value, as it's used to indicate a lack of + // data + if (data == NULL) { + return lsm_error_null_value; + } + + uint64_t key_len = lsm_str_len(key); + + // Empty string is represented by the root + if (key_len == 0) { + if (trie->root->data == NULL) { + trie->root->data = data; + trie->size++; + + return lsm_error_ok; + } else { + return lsm_error_already_present; + } + } + + uint64_t index = 0; + lsm_trie_node *node = trie->root; + lsm_trie_node *next_node; + + while (index < key_len) { + char c = lsm_str_char(key, index); + lsm_error res = lsm_bt_search((void **)&next_node, &node->bt, c); + + // No child is present yet for this character, so we can insert the string + // here + if (res == lsm_error_not_found) { + lsm_trie_node *new_node; + LSM_RES(lsm_trie_node_init(&new_node)); + + new_node->data = data; + + lsm_str_substr(&new_node->skip, key, index + 1, key_len); + res = lsm_bt_insert(&node->bt, c, new_node); + + if (res != lsm_error_ok) { + lsm_trie_node_free(new_node); + + return res; + } + + trie->size++; + + return lsm_error_ok; + } + + index++; + + // We compare the remaining part of the key with the node's skip. If cmp is + // less than the length of the skip, we know they differ and the edge should + // be split. + uint64_t cmp = lsm_str_cmp(key, index, &next_node->skip, 0); + + if (cmp < lsm_str_len(&next_node->skip)) { + lsm_trie_node *split_node; + LSM_RES(lsm_trie_node_init(&split_node)); + + // split_node replaces the original node as the new child node + // bottom_node here is always the same value as next_node + lsm_trie_node *bottom_node; + lsm_bt_replace((void **)&bottom_node, &node->bt, c, split_node); + + // The old next node now becomes the child of split_node + lsm_bt_insert(&split_node->bt, lsm_str_char(&next_node->skip, cmp), + next_node); + + // split_node's skip has not been initialized yet, so we can simply + // overwrite it with bottom_node's skip + split_node->skip = bottom_node->skip; + + // The new node splits the edge into two parts, so the new split node will + // have the first part of the skip (minus the one character) as its + // skip + lsm_str_substr(&next_node->skip, &split_node->skip, cmp + 1, + lsm_str_len(&split_node->skip)); + + // The old node keeps the first part of the skip + lsm_str_truncate(&split_node->skip, cmp); + + next_node = split_node; + } + + node = next_node; + index += cmp; + } + + // This catches the edge case where the exact node for the string is already + // present in the trie + if (node->data != NULL) { + return lsm_error_already_present; + } + + node->data = data; + trie->size++; + + return lsm_error_ok; +} + +lsm_error lsm_trie_search(void **out, const lsm_trie *trie, + const lsm_str *key) { + uint64_t key_len = lsm_str_len(key); + + if (key_len == 0) { + if (trie->root->data != NULL) { + if (out != NULL) { + *out = trie->root->data; + } + + return lsm_error_ok; + } else { + return lsm_error_not_found; + } + } + + uint64_t index = 0; + lsm_trie_node *node = trie->root; + lsm_trie_node *next_node; + + while (index < key_len) { + char c = lsm_str_char(key, index); + lsm_error res = lsm_bt_search((void **)&next_node, &node->bt, c); + + if (res != lsm_error_ok) { + return res; + } + + index++; + + uint64_t cmp = lsm_str_cmp(key, index, &next_node->skip, 0); + + // If we end in the middle of an edge, we definitely haven't found the node + if (cmp != lsm_str_len(&next_node->skip)) { + return lsm_error_not_found; + } + + node = next_node; + index += cmp; + } + + if (node->data == NULL) { + return lsm_error_not_found; + } + + if (out != NULL) { + *out = node->data; + } + + return lsm_error_ok; +} + +lsm_error lsm_trie_remove(void **out, lsm_trie *trie, const lsm_str *key) { + uint64_t key_len = lsm_str_len(key); + + if (key_len == 0) { + if (trie->root->data != NULL) { + if (out != NULL) { + *out = trie->root->data; + } + + trie->root->data = NULL; + trie->size--; + + return lsm_error_ok; + } else { + return lsm_error_not_found; + } + } + + uint64_t index = 0; + lsm_trie_node *parent = trie->root; + lsm_trie_node *child; + + while (index < key_len) { + char c = lsm_str_char(key, index); + lsm_error res = lsm_bt_search((void **)&child, &parent->bt, c); + + if (res != lsm_error_ok) { + return res; + } + + index++; + + uint64_t cmp = lsm_str_cmp(key, index, &child->skip, 0); + + // If we end in the middle of an edge, we definitely haven't found the node + if (cmp != lsm_str_len(&child->skip)) { + return lsm_error_not_found; + } + + index += cmp; + + // This context is needed for the removal + if (index < key_len) { + parent = child; + } + } + + if (child->data == NULL) { + return lsm_error_not_found; + } + + if (out != NULL) { + *out = child->data; + } + + child->data = NULL; + + trie->size--; + + return lsm_error_ok; +} diff --git a/lsm/test/bt/bt.c b/lsm/test/bt/bt.c new file mode 100644 index 0000000..fdff839 --- /dev/null +++ b/lsm/test/bt/bt.c @@ -0,0 +1,115 @@ +#include "test.h" +#include "lsm/bt_internal.h" + +#define BT_INIT() \ + lsm_bt *bt; \ + TEST_CHECK(lsm_bt_init(&bt) == lsm_error_ok); \ + TEST_CHECK(bt != NULL) + +void test_init() { + BT_INIT(); + lsm_bt_free(bt); +} + +void test_insert_first() { + BT_INIT(); + + TEST_CHECK(lsm_bt_insert(bt, 'a', (void *)1) == lsm_error_ok); + TEST_CHECK(lsm_bt_insert(bt, 'a', (void *)1) == lsm_error_already_present); + + void *data; + TEST_CHECK(lsm_bt_search(&data, bt, 'a') == lsm_error_ok); + TEST_CHECK(data == (void *)1); + + TEST_CHECK(lsm_bt_search(&data, bt, 'b') == lsm_error_not_found); + + lsm_bt_free(bt); +} + +void test_insert_two() { + BT_INIT(); + + TEST_CHECK(lsm_bt_insert(bt, 'a', (void *)1) == lsm_error_ok); + TEST_CHECK(lsm_bt_insert(bt, 'a', (void *)1) == lsm_error_already_present); + TEST_CHECK(lsm_bt_insert(bt, 'b', (void *)2) == lsm_error_ok); + TEST_CHECK(lsm_bt_insert(bt, 'b', (void *)2) == lsm_error_already_present); + + void *data; + TEST_CHECK(lsm_bt_search(&data, bt, 'a') == lsm_error_ok); + TEST_CHECK(data == (void *)1); + TEST_CHECK(lsm_bt_search(&data, bt, 'b') == lsm_error_ok); + TEST_CHECK(data == (void *)2); + TEST_CHECK(lsm_bt_search(&data, bt, 'c') == lsm_error_not_found); + + lsm_bt_free(bt); +} + +void test_insert_multiple() { + char chars[] = "falcoep"; + size_t char_count = sizeof(chars) / sizeof(char); + + BT_INIT(); + + for (size_t i = 0; i < char_count; i++) { + TEST_CHECK(lsm_bt_insert(bt, chars[i], (void *)(i + 1)) == lsm_error_ok); + } + + TEST_CHECK(lsm_bt_size(bt) == char_count); + + void *data; + for (size_t i = 0; i < char_count; i++) { + TEST_CHECK(lsm_bt_insert(bt, chars[i], (void *)(i + 1)) == lsm_error_already_present); + TEST_CHECK(lsm_bt_search(&data, bt, chars[i]) == lsm_error_ok); + TEST_CHECK(data == (void *)(i + 1)); + } + + lsm_bt_free(bt); +} + +void test_remove_root() { + BT_INIT(); + + TEST_CHECK(lsm_bt_insert(bt, 'a', (void *)1) == lsm_error_ok); + TEST_CHECK(lsm_bt_size(bt) == 1); + + void *data; + TEST_CHECK(lsm_bt_remove(&data, bt, 'a') == lsm_error_ok); + TEST_CHECK(data == (void *)1); + TEST_CHECK(bt->root == NULL); + TEST_CHECK(lsm_bt_size(bt) == 0); + + lsm_bt_free(bt); +} + +void test_remove_multiple() { + char chars[] = "falcoep"; + size_t char_count = sizeof(chars) / sizeof(char); + + BT_INIT(); + + for (size_t i = 0; i < char_count; i++) { + TEST_CHECK(lsm_bt_insert(bt, chars[i], (void *)(i + 1)) == lsm_error_ok); + } + + void *data; + TEST_CHECK(lsm_bt_remove(&data, bt, 'l') == lsm_error_ok); + TEST_CHECK(data == (void *)3); + TEST_CHECK(lsm_bt_remove(&data, bt, 'l') == lsm_error_not_found); + TEST_CHECK(lsm_bt_remove(&data, bt, 'e') == lsm_error_ok); + TEST_CHECK(data == (void *)6); + TEST_CHECK(lsm_bt_remove(&data, bt, 'e') == lsm_error_not_found); + + TEST_CHECK(lsm_bt_size(bt) == char_count - 2); + + lsm_bt_free(bt); +} + +TEST_LIST = { + { "bt init", test_init }, + { "bt insert first", test_insert_first }, + { "bt insert two", test_insert_two }, + { "bt insert multiple", test_insert_multiple }, + { "bt remove root", test_remove_root }, + { "bt remove multiple", test_remove_multiple }, + { NULL, NULL } +}; diff --git a/lsm/test/str/str.c b/lsm/test/str/str.c new file mode 100644 index 0000000..4cbd2d1 --- /dev/null +++ b/lsm/test/str/str.c @@ -0,0 +1,91 @@ +#include "test.h" +#include "lsm/str_internal.h" + +void test_cmp() { + lsm_str s1, s2, s3; + lsm_str_overwrite_copy(&s1, "some_string"); + lsm_str_overwrite_copy(&s2, "some"); + lsm_str_overwrite_copy(&s3, "some_string_extra"); + + TEST_CHECK(lsm_str_cmp(&s1, 0, &s2, 0) == 4); + TEST_CHECK(lsm_str_cmp(&s1, 0, &s2, 1) == 0); + TEST_CHECK(lsm_str_cmp(&s1, 1, &s2, 1) == 3); + TEST_CHECK(lsm_str_cmp(&s1, 1, &s2, 0) == 0); + + TEST_CHECK(lsm_str_cmp(&s1, 0, &s3, 0) == lsm_str_len(&s1)); +} + +void test_eq() { + lsm_str s1, s2; + lsm_str_overwrite_copy(&s1, "longerthan8"); + lsm_str_overwrite_copy(&s2, "longerthan8"); + + TEST_CHECK(lsm_str_eq(&s1, &s2)); + + lsm_str_overwrite_copy(&s1, "longerthan8"); + lsm_str_overwrite_copy(&s2, "lmaolongerthan8"); + + TEST_CHECK(!lsm_str_eq(&s1, &s2)); + + lsm_str_overwrite_copy(&s1, "short"); + lsm_str_overwrite_copy(&s2, "short"); + + TEST_CHECK(lsm_str_eq(&s1, &s2)); + + lsm_str_overwrite_copy(&s1, "short"); + lsm_str_overwrite_copy(&s1, "shorte"); + + TEST_CHECK(!lsm_str_eq(&s1, &s2)); + + lsm_str_overwrite_copy(&s1, "longerthan8"); + lsm_str_overwrite_copy(&s2, "short"); + + TEST_CHECK(!lsm_str_eq(&s1, &s2)); +} + +void test_substr() { + lsm_str s1, s2, s3; + lsm_str_overwrite_copy(&s1, "some_string"); + lsm_str_overwrite_copy(&s3, "string"); + lsm_str_substr(&s2, &s1, 5, lsm_str_len(&s1)); + + TEST_CHECK(lsm_str_eq(&s2, &s3)); + + lsm_str_zero(&s2); + lsm_str_substr(&s2, &s1, 25, lsm_str_len(&s1)); + + TEST_CHECK(lsm_str_len(&s2) == 0); +} + +void test_truncate() { + lsm_str s1, s2, s3; + lsm_str_overwrite_copy(&s1, "some_longer_string_thing"); + lsm_str_overwrite_copy(&s2, "some_longer_string"); + lsm_str_overwrite_copy(&s3, "some"); + + lsm_str_truncate(&s1, 18); + TEST_CHECK(lsm_str_eq(&s1, &s2)); + + lsm_str_truncate(&s1, 4); + TEST_CHECK(lsm_str_eq(&s1, &s3)); +} + +void test_init_copy() { + char orig[] = "some_string"; + lsm_str *s; + lsm_str_init_copy(&s, orig); + + TEST_CHECK(s->data.ptr != orig); + TEST_CHECK(strcmp(s->data.ptr, orig) == 0); + + lsm_str_free(s); +} + +TEST_LIST = { + { "str init_copy", test_init_copy }, + { "str cmp", test_cmp }, + { "str eq", test_eq }, + { "str substr", test_substr }, + { "str truncate", test_truncate }, + { NULL, NULL } +}; diff --git a/lsm/test/test.h b/lsm/test/test.h new file mode 100644 index 0000000..9ab8f88 --- /dev/null +++ b/lsm/test/test.h @@ -0,0 +1,1839 @@ +/* + * Acutest -- Another C/C++ Unit Test facility + * + * + * Copyright 2013-2020 Martin Mitas + * Copyright 2019 Garrett D'Amore + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef ACUTEST_H +#define ACUTEST_H + + +/************************ + *** Public interface *** + ************************/ + +/* By default, "acutest.h" provides the main program entry point (function + * main()). However, if the test suite is composed of multiple source files + * which include "acutest.h", then this causes a problem of multiple main() + * definitions. To avoid this problem, #define macro TEST_NO_MAIN in all + * compilation units but one. + */ + +/* Macro to specify list of unit tests in the suite. + * The unit test implementation MUST provide list of unit tests it implements + * with this macro: + * + * TEST_LIST = { + * { "test1_name", test1_func_ptr }, + * { "test2_name", test2_func_ptr }, + * ... + * { NULL, NULL } // zeroed record marking the end of the list + * }; + * + * The list specifies names of each test (must be unique) and pointer to + * a function implementing it. The function does not take any arguments + * and has no return values, i.e. every test function has to be compatible + * with this prototype: + * + * void test_func(void); + * + * Note the list has to be ended with a zeroed record. + */ +#define TEST_LIST const struct acutest_test_ acutest_list_[] + + +/* Macros for testing whether an unit test succeeds or fails. These macros + * can be used arbitrarily in functions implementing the unit tests. + * + * If any condition fails throughout execution of a test, the test fails. + * + * TEST_CHECK takes only one argument (the condition), TEST_CHECK_ allows + * also to specify an error message to print out if the condition fails. + * (It expects printf-like format string and its parameters). The macros + * return non-zero (condition passes) or 0 (condition fails). + * + * That can be useful when more conditions should be checked only if some + * preceding condition passes, as illustrated in this code snippet: + * + * SomeStruct* ptr = allocate_some_struct(); + * if(TEST_CHECK(ptr != NULL)) { + * TEST_CHECK(ptr->member1 < 100); + * TEST_CHECK(ptr->member2 > 200); + * } + */ +#define TEST_CHECK_(cond,...) acutest_check_((cond), __FILE__, __LINE__, __VA_ARGS__) +#define TEST_CHECK(cond) acutest_check_((cond), __FILE__, __LINE__, "%s", #cond) + + +/* These macros are the same as TEST_CHECK_ and TEST_CHECK except that if the + * condition fails, the currently executed unit test is immediately aborted. + * + * That is done either by calling abort() if the unit test is executed as a + * child process; or via longjmp() if the unit test is executed within the + * main Acutest process. + * + * As a side effect of such abortion, your unit tests may cause memory leaks, + * unflushed file descriptors, and other phenomena caused by the abortion. + * + * Therefore you should not use these as a general replacement for TEST_CHECK. + * Use it with some caution, especially if your test causes some other side + * effects to the outside world (e.g. communicating with some server, inserting + * into a database etc.). + */ +#define TEST_ASSERT_(cond,...) \ + do { \ + if(!acutest_check_((cond), __FILE__, __LINE__, __VA_ARGS__)) \ + acutest_abort_(); \ + } while(0) +#define TEST_ASSERT(cond) \ + do { \ + if(!acutest_check_((cond), __FILE__, __LINE__, "%s", #cond)) \ + acutest_abort_(); \ + } while(0) + + +#ifdef __cplusplus +/* Macros to verify that the code (the 1st argument) throws exception of given + * type (the 2nd argument). (Note these macros are only available in C++.) + * + * TEST_EXCEPTION_ is like TEST_EXCEPTION but accepts custom printf-like + * message. + * + * For example: + * + * TEST_EXCEPTION(function_that_throw(), ExpectedExceptionType); + * + * If the function_that_throw() throws ExpectedExceptionType, the check passes. + * If the function throws anything incompatible with ExpectedExceptionType + * (or if it does not thrown an exception at all), the check fails. + */ +#define TEST_EXCEPTION(code, exctype) \ + do { \ + bool exc_ok_ = false; \ + const char *msg_ = NULL; \ + try { \ + code; \ + msg_ = "No exception thrown."; \ + } catch(exctype const&) { \ + exc_ok_= true; \ + } catch(...) { \ + msg_ = "Unexpected exception thrown."; \ + } \ + acutest_check_(exc_ok_, __FILE__, __LINE__, #code " throws " #exctype);\ + if(msg_ != NULL) \ + acutest_message_("%s", msg_); \ + } while(0) +#define TEST_EXCEPTION_(code, exctype, ...) \ + do { \ + bool exc_ok_ = false; \ + const char *msg_ = NULL; \ + try { \ + code; \ + msg_ = "No exception thrown."; \ + } catch(exctype const&) { \ + exc_ok_= true; \ + } catch(...) { \ + msg_ = "Unexpected exception thrown."; \ + } \ + acutest_check_(exc_ok_, __FILE__, __LINE__, __VA_ARGS__); \ + if(msg_ != NULL) \ + acutest_message_("%s", msg_); \ + } while(0) +#endif /* #ifdef __cplusplus */ + + +/* Sometimes it is useful to split execution of more complex unit tests to some + * smaller parts and associate those parts with some names. + * + * This is especially handy if the given unit test is implemented as a loop + * over some vector of multiple testing inputs. Using these macros allow to use + * sort of subtitle for each iteration of the loop (e.g. outputting the input + * itself or a name associated to it), so that if any TEST_CHECK condition + * fails in the loop, it can be easily seen which iteration triggers the + * failure, without the need to manually output the iteration-specific data in + * every single TEST_CHECK inside the loop body. + * + * TEST_CASE allows to specify only single string as the name of the case, + * TEST_CASE_ provides all the power of printf-like string formatting. + * + * Note that the test cases cannot be nested. Starting a new test case ends + * implicitly the previous one. To end the test case explicitly (e.g. to end + * the last test case after exiting the loop), you may use TEST_CASE(NULL). + */ +#define TEST_CASE_(...) acutest_case_(__VA_ARGS__) +#define TEST_CASE(name) acutest_case_("%s", name) + + +/* Maximal output per TEST_CASE call. Longer messages are cut. + * You may define another limit prior including "acutest.h" + */ +#ifndef TEST_CASE_MAXSIZE +#define TEST_CASE_MAXSIZE 64 +#endif + + +/* printf-like macro for outputting an extra information about a failure. + * + * Intended use is to output some computed output versus the expected value, + * e.g. like this: + * + * if(!TEST_CHECK(produced == expected)) { + * TEST_MSG("Expected: %d", expected); + * TEST_MSG("Produced: %d", produced); + * } + * + * Note the message is only written down if the most recent use of any checking + * macro (like e.g. TEST_CHECK or TEST_EXCEPTION) in the current test failed. + * This means the above is equivalent to just this: + * + * TEST_CHECK(produced == expected); + * TEST_MSG("Expected: %d", expected); + * TEST_MSG("Produced: %d", produced); + * + * The macro can deal with multi-line output fairly well. It also automatically + * adds a final new-line if there is none present. + */ +#define TEST_MSG(...) acutest_message_(__VA_ARGS__) + + +/* Maximal output per TEST_MSG call. Longer messages are cut. + * You may define another limit prior including "acutest.h" + */ +#ifndef TEST_MSG_MAXSIZE +#define TEST_MSG_MAXSIZE 1024 +#endif + + +/* Macro for dumping a block of memory. + * + * Its intended use is very similar to what TEST_MSG is for, but instead of + * generating any printf-like message, this is for dumping raw block of a + * memory in a hexadecimal form: + * + * TEST_CHECK(size_produced == size_expected && + * memcmp(addr_produced, addr_expected, size_produced) == 0); + * TEST_DUMP("Expected:", addr_expected, size_expected); + * TEST_DUMP("Produced:", addr_produced, size_produced); + */ +#define TEST_DUMP(title, addr, size) acutest_dump_(title, addr, size) + +/* Maximal output per TEST_DUMP call (in bytes to dump). Longer blocks are cut. + * You may define another limit prior including "acutest.h" + */ +#ifndef TEST_DUMP_MAXSIZE +#define TEST_DUMP_MAXSIZE 1024 +#endif + + +/* Common test initialiation/clean-up + * + * In some test suites, it may be needed to perform some sort of the same + * initialization and/or clean-up in all the tests. + * + * Such test suites may use macros TEST_INIT and/or TEST_FINI prior including + * this header. The expansion of the macro is then used as a body of helper + * function called just before executing every single (TEST_INIT) or just after + * it ends (TEST_FINI). + * + * Examples of various ways how to use the macro TEST_INIT: + * + * #define TEST_INIT my_init_func(); + * #define TEST_INIT my_init_func() // Works even without the semicolon + * #define TEST_INIT setlocale(LC_ALL, NULL); + * #define TEST_INIT { setlocale(LC_ALL, NULL); my_init_func(); } + * + * TEST_FINI is to be used in the same way. + */ + + +/********************** + *** Implementation *** + **********************/ + +/* The unit test files should not rely on anything below. */ + +#include +#include +#include +#include +#include +#include + +#if defined(unix) || defined(__unix__) || defined(__unix) || defined(__APPLE__) +#define ACUTEST_UNIX_ 1 +#include +#include +#include +#include +#include +#include +#include + +#if defined CLOCK_PROCESS_CPUTIME_ID && defined CLOCK_MONOTONIC +#define ACUTEST_HAS_POSIX_TIMER_ 1 +#endif +#endif + +#if defined(_gnu_linux_) || defined(__linux__) +#define ACUTEST_LINUX_ 1 +#include +#include +#endif + +#if defined(_WIN32) || defined(__WIN32__) || defined(__WINDOWS__) +#define ACUTEST_WIN_ 1 + #include + #include +#endif + +#if defined(__APPLE__) +#define ACUTEST_MACOS_ + #include + #include + #include + #include + #include +#endif + +#ifdef __cplusplus +#include +#endif + +#ifdef __has_include +#if __has_include() +#include +#endif +#endif + +/* Enable the use of the non-standard keyword __attribute__ to silence warnings under some compilers */ +#if defined(__GNUC__) || defined(__clang__) +#define ACUTEST_ATTRIBUTE_(attr) __attribute__((attr)) +#else +#define ACUTEST_ATTRIBUTE_(attr) +#endif + +/* Note our global private identifiers end with '_' to mitigate risk of clash + * with the unit tests implementation. */ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _MSC_VER +/* In the multi-platform code like ours, we cannot use the non-standard + * "safe" functions from Microsoft C lib like e.g. sprintf_s() instead of + * standard sprintf(). Hence, lets disable the warning C4996. */ + #pragma warning(push) + #pragma warning(disable: 4996) +#endif + + +struct acutest_test_ { + const char* name; + void (*func)(void); +}; + +struct acutest_test_data_ { + unsigned char flags; + double duration; +}; + +enum { + ACUTEST_FLAG_RUN_ = 1 << 0, + ACUTEST_FLAG_SUCCESS_ = 1 << 1, + ACUTEST_FLAG_FAILURE_ = 1 << 2, +}; + +extern const struct acutest_test_ acutest_list_[]; + +int acutest_check_(int cond, const char* file, int line, const char* fmt, ...); +void acutest_case_(const char* fmt, ...); +void acutest_message_(const char* fmt, ...); +void acutest_dump_(const char* title, const void* addr, size_t size); +void acutest_abort_(void) ACUTEST_ATTRIBUTE_(noreturn); + + +#ifndef TEST_NO_MAIN + +static char* acutest_argv0_ = NULL; +static size_t acutest_list_size_ = 0; +static struct acutest_test_data_* acutest_test_data_ = NULL; +static size_t acutest_count_ = 0; +static int acutest_no_exec_ = -1; +static int acutest_no_summary_ = 0; +static int acutest_tap_ = 0; +static int acutest_skip_mode_ = 0; +static int acutest_worker_ = 0; +static int acutest_worker_index_ = 0; +static int acutest_cond_failed_ = 0; +static int acutest_was_aborted_ = 0; +static FILE *acutest_xml_output_ = NULL; + +static int acutest_stat_failed_units_ = 0; +static int acutest_stat_run_units_ = 0; + +static const struct acutest_test_* acutest_current_test_ = NULL; +static int acutest_current_index_ = 0; +static char acutest_case_name_[TEST_CASE_MAXSIZE] = ""; +static int acutest_test_already_logged_ = 0; +static int acutest_case_already_logged_ = 0; +static int acutest_verbose_level_ = 2; +static int acutest_test_failures_ = 0; +static int acutest_colorize_ = 0; +static int acutest_timer_ = 0; + +static int acutest_abort_has_jmp_buf_ = 0; +static jmp_buf acutest_abort_jmp_buf_; + + +static void +acutest_cleanup_(void) +{ + free((void*) acutest_test_data_); +} + +static void ACUTEST_ATTRIBUTE_(noreturn) +acutest_exit_(int exit_code) +{ + acutest_cleanup_(); + exit(exit_code); +} + +#if defined ACUTEST_WIN_ +typedef LARGE_INTEGER acutest_timer_type_; + static LARGE_INTEGER acutest_timer_freq_; + static acutest_timer_type_ acutest_timer_start_; + static acutest_timer_type_ acutest_timer_end_; + + static void + acutest_timer_init_(void) + { + QueryPerformanceFrequency(´st_timer_freq_); + } + + static void + acutest_timer_get_time_(LARGE_INTEGER* ts) + { + QueryPerformanceCounter(ts); + } + + static double + acutest_timer_diff_(LARGE_INTEGER start, LARGE_INTEGER end) + { + double duration = (double)(end.QuadPart - start.QuadPart); + duration /= (double)acutest_timer_freq_.QuadPart; + return duration; + } + + static void + acutest_timer_print_diff_(void) + { + printf("%.6lf secs", acutest_timer_diff_(acutest_timer_start_, acutest_timer_end_)); + } +#elif defined ACUTEST_HAS_POSIX_TIMER_ +static clockid_t acutest_timer_id_; +typedef struct timespec acutest_timer_type_; +static acutest_timer_type_ acutest_timer_start_; +static acutest_timer_type_ acutest_timer_end_; + +static void +acutest_timer_init_(void) +{ + if(acutest_timer_ == 1) + acutest_timer_id_ = CLOCK_MONOTONIC; + else if(acutest_timer_ == 2) + acutest_timer_id_ = CLOCK_PROCESS_CPUTIME_ID; +} + +static void +acutest_timer_get_time_(struct timespec* ts) +{ + clock_gettime(acutest_timer_id_, ts); +} + +static double +acutest_timer_diff_(struct timespec start, struct timespec end) +{ + double endns; + double startns; + + endns = end.tv_sec; + endns *= 1e9; + endns += end.tv_nsec; + + startns = start.tv_sec; + startns *= 1e9; + startns += start.tv_nsec; + + return ((endns - startns)/ 1e9); +} + +static void +acutest_timer_print_diff_(void) +{ + printf("%.6lf secs", + acutest_timer_diff_(acutest_timer_start_, acutest_timer_end_)); +} +#else +typedef int acutest_timer_type_; + static acutest_timer_type_ acutest_timer_start_; + static acutest_timer_type_ acutest_timer_end_; + + void + acutest_timer_init_(void) + {} + + static void + acutest_timer_get_time_(int* ts) + { + (void) ts; + } + + static double + acutest_timer_diff_(int start, int end) + { + (void) start; + (void) end; + return 0.0; + } + + static void + acutest_timer_print_diff_(void) + {} +#endif + +#define ACUTEST_COLOR_DEFAULT_ 0 +#define ACUTEST_COLOR_GREEN_ 1 +#define ACUTEST_COLOR_RED_ 2 +#define ACUTEST_COLOR_DEFAULT_INTENSIVE_ 3 +#define ACUTEST_COLOR_GREEN_INTENSIVE_ 4 +#define ACUTEST_COLOR_RED_INTENSIVE_ 5 + +static int ACUTEST_ATTRIBUTE_(format (printf, 2, 3)) +acutest_colored_printf_(int color, const char* fmt, ...) +{ + va_list args; + char buffer[256]; + int n; + + va_start(args, fmt); + vsnprintf(buffer, sizeof(buffer), fmt, args); + va_end(args); + buffer[sizeof(buffer)-1] = '\0'; + + if(!acutest_colorize_) { + return printf("%s", buffer); + } + +#if defined ACUTEST_UNIX_ + { + const char* col_str; + switch(color) { + case ACUTEST_COLOR_GREEN_: col_str = "\033[0;32m"; break; + case ACUTEST_COLOR_RED_: col_str = "\033[0;31m"; break; + case ACUTEST_COLOR_GREEN_INTENSIVE_: col_str = "\033[1;32m"; break; + case ACUTEST_COLOR_RED_INTENSIVE_: col_str = "\033[1;31m"; break; + case ACUTEST_COLOR_DEFAULT_INTENSIVE_: col_str = "\033[1m"; break; + default: col_str = "\033[0m"; break; + } + printf("%s", col_str); + n = printf("%s", buffer); + printf("\033[0m"); + return n; + } +#elif defined ACUTEST_WIN_ + { + HANDLE h; + CONSOLE_SCREEN_BUFFER_INFO info; + WORD attr; + + h = GetStdHandle(STD_OUTPUT_HANDLE); + GetConsoleScreenBufferInfo(h, &info); + + switch(color) { + case ACUTEST_COLOR_GREEN_: attr = FOREGROUND_GREEN; break; + case ACUTEST_COLOR_RED_: attr = FOREGROUND_RED; break; + case ACUTEST_COLOR_GREEN_INTENSIVE_: attr = FOREGROUND_GREEN | FOREGROUND_INTENSITY; break; + case ACUTEST_COLOR_RED_INTENSIVE_: attr = FOREGROUND_RED | FOREGROUND_INTENSITY; break; + case ACUTEST_COLOR_DEFAULT_INTENSIVE_: attr = FOREGROUND_BLUE | FOREGROUND_GREEN | FOREGROUND_RED | FOREGROUND_INTENSITY; break; + default: attr = 0; break; + } + if(attr != 0) + SetConsoleTextAttribute(h, attr); + n = printf("%s", buffer); + SetConsoleTextAttribute(h, info.wAttributes); + return n; + } +#else + n = printf("%s", buffer); + return n; +#endif +} + +static void +acutest_begin_test_line_(const struct acutest_test_* test) +{ + if(!acutest_tap_) { + if(acutest_verbose_level_ >= 3) { + acutest_colored_printf_(ACUTEST_COLOR_DEFAULT_INTENSIVE_, "Test %s:\n", test->name); + acutest_test_already_logged_++; + } else if(acutest_verbose_level_ >= 1) { + int n; + char spaces[48]; + + n = acutest_colored_printf_(ACUTEST_COLOR_DEFAULT_INTENSIVE_, "Test %s... ", test->name); + memset(spaces, ' ', sizeof(spaces)); + if(n < (int) sizeof(spaces)) + printf("%.*s", (int) sizeof(spaces) - n, spaces); + } else { + acutest_test_already_logged_ = 1; + } + } +} + +static void +acutest_finish_test_line_(int result) +{ + if(acutest_tap_) { + const char* str = (result == 0) ? "ok" : "not ok"; + + printf("%s %d - %s\n", str, acutest_current_index_ + 1, acutest_current_test_->name); + + if(result == 0 && acutest_timer_) { + printf("# Duration: "); + acutest_timer_print_diff_(); + printf("\n"); + } + } else { + int color = (result == 0) ? ACUTEST_COLOR_GREEN_INTENSIVE_ : ACUTEST_COLOR_RED_INTENSIVE_; + const char* str = (result == 0) ? "OK" : "FAILED"; + printf("[ "); + acutest_colored_printf_(color, "%s", str); + printf(" ]"); + + if(result == 0 && acutest_timer_) { + printf(" "); + acutest_timer_print_diff_(); + } + + printf("\n"); + } +} + +static void +acutest_line_indent_(int level) +{ + static const char spaces[] = " "; + int n = level * 2; + + if(acutest_tap_ && n > 0) { + n--; + printf("#"); + } + + while(n > 16) { + printf("%s", spaces); + n -= 16; + } + printf("%.*s", n, spaces); +} + +int ACUTEST_ATTRIBUTE_(format (printf, 4, 5)) +acutest_check_(int cond, const char* file, int line, const char* fmt, ...) +{ + const char *result_str; + int result_color; + int verbose_level; + + if(cond) { + result_str = "ok"; + result_color = ACUTEST_COLOR_GREEN_; + verbose_level = 3; + } else { + if(!acutest_test_already_logged_ && acutest_current_test_ != NULL) + acutest_finish_test_line_(-1); + + result_str = "failed"; + result_color = ACUTEST_COLOR_RED_; + verbose_level = 2; + acutest_test_failures_++; + acutest_test_already_logged_++; + } + + if(acutest_verbose_level_ >= verbose_level) { + va_list args; + + if(!acutest_case_already_logged_ && acutest_case_name_[0]) { + acutest_line_indent_(1); + acutest_colored_printf_(ACUTEST_COLOR_DEFAULT_INTENSIVE_, "Case %s:\n", acutest_case_name_); + acutest_test_already_logged_++; + acutest_case_already_logged_++; + } + + acutest_line_indent_(acutest_case_name_[0] ? 2 : 1); + if(file != NULL) { +#ifdef ACUTEST_WIN_ + const char* lastsep1 = strrchr(file, '\\'); + const char* lastsep2 = strrchr(file, '/'); + if(lastsep1 == NULL) + lastsep1 = file-1; + if(lastsep2 == NULL) + lastsep2 = file-1; + file = (lastsep1 > lastsep2 ? lastsep1 : lastsep2) + 1; +#else + const char* lastsep = strrchr(file, '/'); + if(lastsep != NULL) + file = lastsep+1; +#endif + printf("%s:%d: Check ", file, line); + } + + va_start(args, fmt); + vprintf(fmt, args); + va_end(args); + + printf("... "); + acutest_colored_printf_(result_color, "%s", result_str); + printf("\n"); + acutest_test_already_logged_++; + } + + acutest_cond_failed_ = (cond == 0); + return !acutest_cond_failed_; +} + +void ACUTEST_ATTRIBUTE_(format (printf, 1, 2)) +acutest_case_(const char* fmt, ...) +{ + va_list args; + + if(acutest_verbose_level_ < 2) + return; + + if(acutest_case_name_[0]) { + acutest_case_already_logged_ = 0; + acutest_case_name_[0] = '\0'; + } + + if(fmt == NULL) + return; + + va_start(args, fmt); + vsnprintf(acutest_case_name_, sizeof(acutest_case_name_) - 1, fmt, args); + va_end(args); + acutest_case_name_[sizeof(acutest_case_name_) - 1] = '\0'; + + if(acutest_verbose_level_ >= 3) { + acutest_line_indent_(1); + acutest_colored_printf_(ACUTEST_COLOR_DEFAULT_INTENSIVE_, "Case %s:\n", acutest_case_name_); + acutest_test_already_logged_++; + acutest_case_already_logged_++; + } +} + +void ACUTEST_ATTRIBUTE_(format (printf, 1, 2)) +acutest_message_(const char* fmt, ...) +{ + char buffer[TEST_MSG_MAXSIZE]; + char* line_beg; + char* line_end; + va_list args; + + if(acutest_verbose_level_ < 2) + return; + + /* We allow extra message only when something is already wrong in the + * current test. */ + if(acutest_current_test_ == NULL || !acutest_cond_failed_) + return; + + va_start(args, fmt); + vsnprintf(buffer, TEST_MSG_MAXSIZE, fmt, args); + va_end(args); + buffer[TEST_MSG_MAXSIZE-1] = '\0'; + + line_beg = buffer; + while(1) { + line_end = strchr(line_beg, '\n'); + if(line_end == NULL) + break; + acutest_line_indent_(acutest_case_name_[0] ? 3 : 2); + printf("%.*s\n", (int)(line_end - line_beg), line_beg); + line_beg = line_end + 1; + } + if(line_beg[0] != '\0') { + acutest_line_indent_(acutest_case_name_[0] ? 3 : 2); + printf("%s\n", line_beg); + } +} + +void +acutest_dump_(const char* title, const void* addr, size_t size) +{ + static const size_t BYTES_PER_LINE = 16; + size_t line_beg; + size_t truncate = 0; + + if(acutest_verbose_level_ < 2) + return; + + /* We allow extra message only when something is already wrong in the + * current test. */ + if(acutest_current_test_ == NULL || !acutest_cond_failed_) + return; + + if(size > TEST_DUMP_MAXSIZE) { + truncate = size - TEST_DUMP_MAXSIZE; + size = TEST_DUMP_MAXSIZE; + } + + acutest_line_indent_(acutest_case_name_[0] ? 3 : 2); + printf((title[strlen(title)-1] == ':') ? "%s\n" : "%s:\n", title); + + for(line_beg = 0; line_beg < size; line_beg += BYTES_PER_LINE) { + size_t line_end = line_beg + BYTES_PER_LINE; + size_t off; + + acutest_line_indent_(acutest_case_name_[0] ? 4 : 3); + printf("%08lx: ", (unsigned long)line_beg); + for(off = line_beg; off < line_end; off++) { + if(off < size) + printf(" %02x", ((const unsigned char*)addr)[off]); + else + printf(" "); + } + + printf(" "); + for(off = line_beg; off < line_end; off++) { + unsigned char byte = ((const unsigned char*)addr)[off]; + if(off < size) + printf("%c", (iscntrl(byte) ? '.' : byte)); + else + break; + } + + printf("\n"); + } + + if(truncate > 0) { + acutest_line_indent_(acutest_case_name_[0] ? 4 : 3); + printf(" ... (and more %u bytes)\n", (unsigned) truncate); + } +} + +/* This is called just before each test */ +static void +acutest_init_(const char *test_name) +{ +#ifdef TEST_INIT + TEST_INIT + ; /* Allow for a single unterminated function call */ +#endif + + /* Suppress any warnings about unused variable. */ + (void) test_name; +} + +/* This is called after each test */ +static void +acutest_fini_(const char *test_name) +{ +#ifdef TEST_FINI + TEST_FINI + ; /* Allow for a single unterminated function call */ +#endif + + /* Suppress any warnings about unused variable. */ + (void) test_name; +} + +void +acutest_abort_(void) +{ + if(acutest_abort_has_jmp_buf_) { + longjmp(acutest_abort_jmp_buf_, 1); + } else { + if(acutest_current_test_ != NULL) + acutest_fini_(acutest_current_test_->name); + abort(); + } +} + +static void +acutest_list_names_(void) +{ + const struct acutest_test_* test; + + printf("Unit tests:\n"); + for(test = ´st_list_[0]; test->func != NULL; test++) + printf(" %s\n", test->name); +} + +static void +acutest_remember_(int i) +{ + if(acutest_test_data_[i].flags & ACUTEST_FLAG_RUN_) + return; + + acutest_test_data_[i].flags |= ACUTEST_FLAG_RUN_; + acutest_count_++; +} + +static void +acutest_set_success_(int i, int success) +{ + acutest_test_data_[i].flags |= success ? ACUTEST_FLAG_SUCCESS_ : ACUTEST_FLAG_FAILURE_; +} + +static void +acutest_set_duration_(int i, double duration) +{ + acutest_test_data_[i].duration = duration; +} + +static int +acutest_name_contains_word_(const char* name, const char* pattern) +{ + static const char word_delim[] = " \t-_/.,:;"; + const char* substr; + size_t pattern_len; + + pattern_len = strlen(pattern); + + substr = strstr(name, pattern); + while(substr != NULL) { + int starts_on_word_boundary = (substr == name || strchr(word_delim, substr[-1]) != NULL); + int ends_on_word_boundary = (substr[pattern_len] == '\0' || strchr(word_delim, substr[pattern_len]) != NULL); + + if(starts_on_word_boundary && ends_on_word_boundary) + return 1; + + substr = strstr(substr+1, pattern); + } + + return 0; +} + +static int +acutest_lookup_(const char* pattern) +{ + int i; + int n = 0; + + /* Try exact match. */ + for(i = 0; i < (int) acutest_list_size_; i++) { + if(strcmp(acutest_list_[i].name, pattern) == 0) { + acutest_remember_(i); + n++; + break; + } + } + if(n > 0) + return n; + + /* Try word match. */ + for(i = 0; i < (int) acutest_list_size_; i++) { + if(acutest_name_contains_word_(acutest_list_[i].name, pattern)) { + acutest_remember_(i); + n++; + } + } + if(n > 0) + return n; + + /* Try relaxed match. */ + for(i = 0; i < (int) acutest_list_size_; i++) { + if(strstr(acutest_list_[i].name, pattern) != NULL) { + acutest_remember_(i); + n++; + } + } + + return n; +} + + +/* Called if anything goes bad in Acutest, or if the unit test ends in other + * way then by normal returning from its function (e.g. exception or some + * abnormal child process termination). */ +static void ACUTEST_ATTRIBUTE_(format (printf, 1, 2)) +acutest_error_(const char* fmt, ...) +{ + if(acutest_verbose_level_ == 0) + return; + + if(acutest_verbose_level_ >= 2) { + va_list args; + + acutest_line_indent_(1); + if(acutest_verbose_level_ >= 3) + acutest_colored_printf_(ACUTEST_COLOR_RED_INTENSIVE_, "ERROR: "); + va_start(args, fmt); + vprintf(fmt, args); + va_end(args); + printf("\n"); + } + + if(acutest_verbose_level_ >= 3) { + printf("\n"); + } +} + +/* Call directly the given test unit function. */ +static int +acutest_do_run_(const struct acutest_test_* test, int index) +{ + int status = -1; + + acutest_was_aborted_ = 0; + acutest_current_test_ = test; + acutest_current_index_ = index; + acutest_test_failures_ = 0; + acutest_test_already_logged_ = 0; + acutest_cond_failed_ = 0; + +#ifdef __cplusplus + try { +#endif + acutest_init_(test->name); + acutest_begin_test_line_(test); + + /* This is good to do in case the test unit crashes. */ + fflush(stdout); + fflush(stderr); + + if(!acutest_worker_) { + acutest_abort_has_jmp_buf_ = 1; + if(setjmp(acutest_abort_jmp_buf_) != 0) { + acutest_was_aborted_ = 1; + goto aborted; + } + } + + acutest_timer_get_time_(´st_timer_start_); + test->func(); + aborted: + acutest_abort_has_jmp_buf_ = 0; + acutest_timer_get_time_(´st_timer_end_); + + if(acutest_verbose_level_ >= 3) { + acutest_line_indent_(1); + if(acutest_test_failures_ == 0) { + acutest_colored_printf_(ACUTEST_COLOR_GREEN_INTENSIVE_, "SUCCESS: "); + printf("All conditions have passed.\n"); + + if(acutest_timer_) { + acutest_line_indent_(1); + printf("Duration: "); + acutest_timer_print_diff_(); + printf("\n"); + } + } else { + acutest_colored_printf_(ACUTEST_COLOR_RED_INTENSIVE_, "FAILED: "); + if(!acutest_was_aborted_) { + printf("%d condition%s %s failed.\n", + acutest_test_failures_, + (acutest_test_failures_ == 1) ? "" : "s", + (acutest_test_failures_ == 1) ? "has" : "have"); + } else { + printf("Aborted.\n"); + } + } + printf("\n"); + } else if(acutest_verbose_level_ >= 1 && acutest_test_failures_ == 0) { + acutest_finish_test_line_(0); + } + + status = (acutest_test_failures_ == 0) ? 0 : -1; + +#ifdef __cplusplus + } catch(std::exception& e) { + const char* what = e.what(); + acutest_check_(0, NULL, 0, "Threw std::exception"); + if(what != NULL) + acutest_message_("std::exception::what(): %s", what); + + if(acutest_verbose_level_ >= 3) { + acutest_line_indent_(1); + acutest_colored_printf_(ACUTEST_COLOR_RED_INTENSIVE_, "FAILED: "); + printf("C++ exception.\n\n"); + } + } catch(...) { + acutest_check_(0, NULL, 0, "Threw an exception"); + + if(acutest_verbose_level_ >= 3) { + acutest_line_indent_(1); + acutest_colored_printf_(ACUTEST_COLOR_RED_INTENSIVE_, "FAILED: "); + printf("C++ exception.\n\n"); + } + } +#endif + + acutest_fini_(test->name); + acutest_case_(NULL); + acutest_current_test_ = NULL; + + return status; +} + +/* Trigger the unit test. If possible (and not suppressed) it starts a child + * process who calls acutest_do_run_(), otherwise it calls acutest_do_run_() + * directly. */ +static void +acutest_run_(const struct acutest_test_* test, int index, int master_index) +{ + int failed = 1; + acutest_timer_type_ start, end; + + acutest_current_test_ = test; + acutest_test_already_logged_ = 0; + acutest_timer_get_time_(&start); + + if(!acutest_no_exec_) { + +#if defined(ACUTEST_UNIX_) + + pid_t pid; + int exit_code; + + /* Make sure the child starts with empty I/O buffers. */ + fflush(stdout); + fflush(stderr); + + pid = fork(); + if(pid == (pid_t)-1) { + acutest_error_("Cannot fork. %s [%d]", strerror(errno), errno); + failed = 1; + } else if(pid == 0) { + /* Child: Do the test. */ + acutest_worker_ = 1; + failed = (acutest_do_run_(test, index) != 0); + acutest_exit_(failed ? 1 : 0); + } else { + /* Parent: Wait until child terminates and analyze its exit code. */ + waitpid(pid, &exit_code, 0); + if(WIFEXITED(exit_code)) { + switch(WEXITSTATUS(exit_code)) { + case 0: failed = 0; break; /* test has passed. */ + case 1: /* noop */ break; /* "normal" failure. */ + default: acutest_error_("Unexpected exit code [%d]", WEXITSTATUS(exit_code)); + } + } else if(WIFSIGNALED(exit_code)) { + char tmp[32]; + const char* signame; + switch(WTERMSIG(exit_code)) { + case SIGINT: signame = "SIGINT"; break; + case SIGHUP: signame = "SIGHUP"; break; + case SIGQUIT: signame = "SIGQUIT"; break; + case SIGABRT: signame = "SIGABRT"; break; + case SIGKILL: signame = "SIGKILL"; break; + case SIGSEGV: signame = "SIGSEGV"; break; + case SIGILL: signame = "SIGILL"; break; + case SIGTERM: signame = "SIGTERM"; break; + default: sprintf(tmp, "signal %d", WTERMSIG(exit_code)); signame = tmp; break; + } + acutest_error_("Test interrupted by %s.", signame); + } else { + acutest_error_("Test ended in an unexpected way [%d].", exit_code); + } + } + +#elif defined(ACUTEST_WIN_) + + char buffer[512] = {0}; + STARTUPINFOA startupInfo; + PROCESS_INFORMATION processInfo; + DWORD exitCode; + + /* Windows has no fork(). So we propagate all info into the child + * through a command line arguments. */ + _snprintf(buffer, sizeof(buffer)-1, + "%s --worker=%d %s --no-exec --no-summary %s --verbose=%d --color=%s -- \"%s\"", + acutest_argv0_, index, acutest_timer_ ? "--time" : "", + acutest_tap_ ? "--tap" : "", acutest_verbose_level_, + acutest_colorize_ ? "always" : "never", + test->name); + memset(&startupInfo, 0, sizeof(startupInfo)); + startupInfo.cb = sizeof(STARTUPINFO); + if(CreateProcessA(NULL, buffer, NULL, NULL, FALSE, 0, NULL, NULL, &startupInfo, &processInfo)) { + WaitForSingleObject(processInfo.hProcess, INFINITE); + GetExitCodeProcess(processInfo.hProcess, &exitCode); + CloseHandle(processInfo.hThread); + CloseHandle(processInfo.hProcess); + failed = (exitCode != 0); + if(exitCode > 1) { + switch(exitCode) { + case 3: acutest_error_("Aborted."); break; + case 0xC0000005: acutest_error_("Access violation."); break; + default: acutest_error_("Test ended in an unexpected way [%lu].", exitCode); break; + } + } + } else { + acutest_error_("Cannot create unit test subprocess [%ld].", GetLastError()); + failed = 1; + } + +#else + + /* A platform where we don't know how to run child process. */ + failed = (acutest_do_run_(test, index) != 0); + +#endif + + } else { + /* Child processes suppressed through --no-exec. */ + failed = (acutest_do_run_(test, index) != 0); + } + acutest_timer_get_time_(&end); + + acutest_current_test_ = NULL; + + acutest_stat_run_units_++; + if(failed) + acutest_stat_failed_units_++; + + acutest_set_success_(master_index, !failed); + acutest_set_duration_(master_index, acutest_timer_diff_(start, end)); +} + +#if defined(ACUTEST_WIN_) +/* Callback for SEH events. */ +static LONG CALLBACK +acutest_seh_exception_filter_(EXCEPTION_POINTERS *ptrs) +{ + acutest_check_(0, NULL, 0, "Unhandled SEH exception"); + acutest_message_("Exception code: 0x%08lx", ptrs->ExceptionRecord->ExceptionCode); + acutest_message_("Exception address: 0x%p", ptrs->ExceptionRecord->ExceptionAddress); + + fflush(stdout); + fflush(stderr); + + return EXCEPTION_EXECUTE_HANDLER; +} +#endif + + +#define ACUTEST_CMDLINE_OPTFLAG_OPTIONALARG_ 0x0001 +#define ACUTEST_CMDLINE_OPTFLAG_REQUIREDARG_ 0x0002 + +#define ACUTEST_CMDLINE_OPTID_NONE_ 0 +#define ACUTEST_CMDLINE_OPTID_UNKNOWN_ (-0x7fffffff + 0) +#define ACUTEST_CMDLINE_OPTID_MISSINGARG_ (-0x7fffffff + 1) +#define ACUTEST_CMDLINE_OPTID_BOGUSARG_ (-0x7fffffff + 2) + +typedef struct acutest_test_CMDLINE_OPTION_ { + char shortname; + const char* longname; + int id; + unsigned flags; +} ACUTEST_CMDLINE_OPTION_; + +static int +acutest_cmdline_handle_short_opt_group_(const ACUTEST_CMDLINE_OPTION_* options, + const char* arggroup, + int (*callback)(int /*optval*/, const char* /*arg*/)) +{ + const ACUTEST_CMDLINE_OPTION_* opt; + int i; + int ret = 0; + + for(i = 0; arggroup[i] != '\0'; i++) { + for(opt = options; opt->id != 0; opt++) { + if(arggroup[i] == opt->shortname) + break; + } + + if(opt->id != 0 && !(opt->flags & ACUTEST_CMDLINE_OPTFLAG_REQUIREDARG_)) { + ret = callback(opt->id, NULL); + } else { + /* Unknown option. */ + char badoptname[3]; + badoptname[0] = '-'; + badoptname[1] = arggroup[i]; + badoptname[2] = '\0'; + ret = callback((opt->id != 0 ? ACUTEST_CMDLINE_OPTID_MISSINGARG_ : ACUTEST_CMDLINE_OPTID_UNKNOWN_), + badoptname); + } + + if(ret != 0) + break; + } + + return ret; +} + +#define ACUTEST_CMDLINE_AUXBUF_SIZE_ 32 + +static int +acutest_cmdline_read_(const ACUTEST_CMDLINE_OPTION_* options, int argc, char** argv, + int (*callback)(int /*optval*/, const char* /*arg*/)) +{ + + const ACUTEST_CMDLINE_OPTION_* opt; + char auxbuf[ACUTEST_CMDLINE_AUXBUF_SIZE_+1]; + int after_doubledash = 0; + int i = 1; + int ret = 0; + + auxbuf[ACUTEST_CMDLINE_AUXBUF_SIZE_] = '\0'; + + while(i < argc) { + if(after_doubledash || strcmp(argv[i], "-") == 0) { + /* Non-option argument. */ + ret = callback(ACUTEST_CMDLINE_OPTID_NONE_, argv[i]); + } else if(strcmp(argv[i], "--") == 0) { + /* End of options. All the remaining members are non-option arguments. */ + after_doubledash = 1; + } else if(argv[i][0] != '-') { + /* Non-option argument. */ + ret = callback(ACUTEST_CMDLINE_OPTID_NONE_, argv[i]); + } else { + for(opt = options; opt->id != 0; opt++) { + if(opt->longname != NULL && strncmp(argv[i], "--", 2) == 0) { + size_t len = strlen(opt->longname); + if(strncmp(argv[i]+2, opt->longname, len) == 0) { + /* Regular long option. */ + if(argv[i][2+len] == '\0') { + /* with no argument provided. */ + if(!(opt->flags & ACUTEST_CMDLINE_OPTFLAG_REQUIREDARG_)) + ret = callback(opt->id, NULL); + else + ret = callback(ACUTEST_CMDLINE_OPTID_MISSINGARG_, argv[i]); + break; + } else if(argv[i][2+len] == '=') { + /* with an argument provided. */ + if(opt->flags & (ACUTEST_CMDLINE_OPTFLAG_OPTIONALARG_ | ACUTEST_CMDLINE_OPTFLAG_REQUIREDARG_)) { + ret = callback(opt->id, argv[i]+2+len+1); + } else { + sprintf(auxbuf, "--%s", opt->longname); + ret = callback(ACUTEST_CMDLINE_OPTID_BOGUSARG_, auxbuf); + } + break; + } else { + continue; + } + } + } else if(opt->shortname != '\0' && argv[i][0] == '-') { + if(argv[i][1] == opt->shortname) { + /* Regular short option. */ + if(opt->flags & ACUTEST_CMDLINE_OPTFLAG_REQUIREDARG_) { + if(argv[i][2] != '\0') + ret = callback(opt->id, argv[i]+2); + else if(i+1 < argc) + ret = callback(opt->id, argv[++i]); + else + ret = callback(ACUTEST_CMDLINE_OPTID_MISSINGARG_, argv[i]); + break; + } else { + ret = callback(opt->id, NULL); + + /* There might be more (argument-less) short options + * grouped together. */ + if(ret == 0 && argv[i][2] != '\0') + ret = acutest_cmdline_handle_short_opt_group_(options, argv[i]+2, callback); + break; + } + } + } + } + + if(opt->id == 0) { /* still not handled? */ + if(argv[i][0] != '-') { + /* Non-option argument. */ + ret = callback(ACUTEST_CMDLINE_OPTID_NONE_, argv[i]); + } else { + /* Unknown option. */ + char* badoptname = argv[i]; + + if(strncmp(badoptname, "--", 2) == 0) { + /* Strip any argument from the long option. */ + char* assignment = strchr(badoptname, '='); + if(assignment != NULL) { + size_t len = assignment - badoptname; + if(len > ACUTEST_CMDLINE_AUXBUF_SIZE_) + len = ACUTEST_CMDLINE_AUXBUF_SIZE_; + strncpy(auxbuf, badoptname, len); + auxbuf[len] = '\0'; + badoptname = auxbuf; + } + } + + ret = callback(ACUTEST_CMDLINE_OPTID_UNKNOWN_, badoptname); + } + } + } + + if(ret != 0) + return ret; + i++; + } + + return ret; +} + +static void +acutest_help_(void) +{ + printf("Usage: %s [options] [test...]\n", acutest_argv0_); + printf("\n"); + printf("Run the specified unit tests; or if the option '--skip' is used, run all\n"); + printf("tests in the suite but those listed. By default, if no tests are specified\n"); + printf("on the command line, all unit tests in the suite are run.\n"); + printf("\n"); + printf("Options:\n"); + printf(" -s, --skip Execute all unit tests but the listed ones\n"); + printf(" --exec[=WHEN] If supported, execute unit tests as child processes\n"); + printf(" (WHEN is one of 'auto', 'always', 'never')\n"); + printf(" -E, --no-exec Same as --exec=never\n"); +#if defined ACUTEST_WIN_ + printf(" -t, --time Measure test duration\n"); +#elif defined ACUTEST_HAS_POSIX_TIMER_ + printf(" -t, --time Measure test duration (real time)\n"); + printf(" --time=TIMER Measure test duration, using given timer\n"); + printf(" (TIMER is one of 'real', 'cpu')\n"); +#endif + printf(" --no-summary Suppress printing of test results summary\n"); + printf(" --tap Produce TAP-compliant output\n"); + printf(" (See https://testanything.org/)\n"); + printf(" -x, --xml-output=FILE Enable XUnit output to the given file\n"); + printf(" -l, --list List unit tests in the suite and exit\n"); + printf(" -v, --verbose Make output more verbose\n"); + printf(" --verbose=LEVEL Set verbose level to LEVEL:\n"); + printf(" 0 ... Be silent\n"); + printf(" 1 ... Output one line per test (and summary)\n"); + printf(" 2 ... As 1 and failed conditions (this is default)\n"); + printf(" 3 ... As 1 and all conditions (and extended summary)\n"); + printf(" -q, --quiet Same as --verbose=0\n"); + printf(" --color[=WHEN] Enable colorized output\n"); + printf(" (WHEN is one of 'auto', 'always', 'never')\n"); + printf(" --no-color Same as --color=never\n"); + printf(" -h, --help Display this help and exit\n"); + + if(acutest_list_size_ < 16) { + printf("\n"); + acutest_list_names_(); + } +} + +static const ACUTEST_CMDLINE_OPTION_ acutest_cmdline_options_[] = { + { 's', "skip", 's', 0 }, + { 0, "exec", 'e', ACUTEST_CMDLINE_OPTFLAG_OPTIONALARG_ }, + { 'E', "no-exec", 'E', 0 }, +#if defined ACUTEST_WIN_ + { 't', "time", 't', 0 }, + { 0, "timer", 't', 0 }, /* kept for compatibility */ +#elif defined ACUTEST_HAS_POSIX_TIMER_ + { 't', "time", 't', ACUTEST_CMDLINE_OPTFLAG_OPTIONALARG_ }, + { 0, "timer", 't', ACUTEST_CMDLINE_OPTFLAG_OPTIONALARG_ }, /* kept for compatibility */ +#endif + { 0, "no-summary", 'S', 0 }, + { 0, "tap", 'T', 0 }, + { 'l', "list", 'l', 0 }, + { 'v', "verbose", 'v', ACUTEST_CMDLINE_OPTFLAG_OPTIONALARG_ }, + { 'q', "quiet", 'q', 0 }, + { 0, "color", 'c', ACUTEST_CMDLINE_OPTFLAG_OPTIONALARG_ }, + { 0, "no-color", 'C', 0 }, + { 'h', "help", 'h', 0 }, + { 0, "worker", 'w', ACUTEST_CMDLINE_OPTFLAG_REQUIREDARG_ }, /* internal */ + { 'x', "xml-output", 'x', ACUTEST_CMDLINE_OPTFLAG_REQUIREDARG_ }, + { 0, NULL, 0, 0 } +}; + +static int +acutest_cmdline_callback_(int id, const char* arg) +{ + switch(id) { + case 's': + acutest_skip_mode_ = 1; + break; + + case 'e': + if(arg == NULL || strcmp(arg, "always") == 0) { + acutest_no_exec_ = 0; + } else if(strcmp(arg, "never") == 0) { + acutest_no_exec_ = 1; + } else if(strcmp(arg, "auto") == 0) { + /*noop*/ + } else { + fprintf(stderr, "%s: Unrecognized argument '%s' for option --exec.\n", acutest_argv0_, arg); + fprintf(stderr, "Try '%s --help' for more information.\n", acutest_argv0_); + acutest_exit_(2); + } + break; + + case 'E': + acutest_no_exec_ = 1; + break; + + case 't': +#if defined ACUTEST_WIN_ || defined ACUTEST_HAS_POSIX_TIMER_ + if(arg == NULL || strcmp(arg, "real") == 0) { + acutest_timer_ = 1; +#ifndef ACUTEST_WIN_ + } else if(strcmp(arg, "cpu") == 0) { + acutest_timer_ = 2; +#endif + } else { + fprintf(stderr, "%s: Unrecognized argument '%s' for option --time.\n", acutest_argv0_, arg); + fprintf(stderr, "Try '%s --help' for more information.\n", acutest_argv0_); + acutest_exit_(2); + } +#endif + break; + + case 'S': + acutest_no_summary_ = 1; + break; + + case 'T': + acutest_tap_ = 1; + break; + + case 'l': + acutest_list_names_(); + acutest_exit_(0); + break; + + case 'v': + acutest_verbose_level_ = (arg != NULL ? atoi(arg) : acutest_verbose_level_+1); + break; + + case 'q': + acutest_verbose_level_ = 0; + break; + + case 'c': + if(arg == NULL || strcmp(arg, "always") == 0) { + acutest_colorize_ = 1; + } else if(strcmp(arg, "never") == 0) { + acutest_colorize_ = 0; + } else if(strcmp(arg, "auto") == 0) { + /*noop*/ + } else { + fprintf(stderr, "%s: Unrecognized argument '%s' for option --color.\n", acutest_argv0_, arg); + fprintf(stderr, "Try '%s --help' for more information.\n", acutest_argv0_); + acutest_exit_(2); + } + break; + + case 'C': + acutest_colorize_ = 0; + break; + + case 'h': + acutest_help_(); + acutest_exit_(0); + break; + + case 'w': + acutest_worker_ = 1; + acutest_worker_index_ = atoi(arg); + break; + case 'x': + acutest_xml_output_ = fopen(arg, "w"); + if (!acutest_xml_output_) { + fprintf(stderr, "Unable to open '%s': %s\n", arg, strerror(errno)); + acutest_exit_(2); + } + break; + + case 0: + if(acutest_lookup_(arg) == 0) { + fprintf(stderr, "%s: Unrecognized unit test '%s'\n", acutest_argv0_, arg); + fprintf(stderr, "Try '%s --list' for list of unit tests.\n", acutest_argv0_); + acutest_exit_(2); + } + break; + + case ACUTEST_CMDLINE_OPTID_UNKNOWN_: + fprintf(stderr, "Unrecognized command line option '%s'.\n", arg); + fprintf(stderr, "Try '%s --help' for more information.\n", acutest_argv0_); + acutest_exit_(2); + break; + + case ACUTEST_CMDLINE_OPTID_MISSINGARG_: + fprintf(stderr, "The command line option '%s' requires an argument.\n", arg); + fprintf(stderr, "Try '%s --help' for more information.\n", acutest_argv0_); + acutest_exit_(2); + break; + + case ACUTEST_CMDLINE_OPTID_BOGUSARG_: + fprintf(stderr, "The command line option '%s' does not expect an argument.\n", arg); + fprintf(stderr, "Try '%s --help' for more information.\n", acutest_argv0_); + acutest_exit_(2); + break; + } + + return 0; +} + + +#ifdef ACUTEST_LINUX_ +static int +acutest_is_tracer_present_(void) +{ + /* Must be large enough so the line 'TracerPid: ${PID}' can fit in. */ + static const int OVERLAP = 32; + + char buf[512]; + int tracer_present = 0; + int fd; + size_t n_read = 0; + + fd = open("/proc/self/status", O_RDONLY); + if(fd == -1) + return 0; + + while(1) { + static const char pattern[] = "TracerPid:"; + const char* field; + + while(n_read < sizeof(buf) - 1) { + ssize_t n; + + n = read(fd, buf + n_read, sizeof(buf) - 1 - n_read); + if(n <= 0) + break; + n_read += n; + } + buf[n_read] = '\0'; + + field = strstr(buf, pattern); + if(field != NULL && field < buf + sizeof(buf) - OVERLAP) { + pid_t tracer_pid = (pid_t) atoi(field + sizeof(pattern) - 1); + tracer_present = (tracer_pid != 0); + break; + } + + if(n_read == sizeof(buf) - 1) { + /* Move the tail with the potentially incomplete line we're looking + * for to the beginning of the buffer. */ + memmove(buf, buf + sizeof(buf) - 1 - OVERLAP, OVERLAP); + n_read = OVERLAP; + } else { + break; + } + } + + close(fd); + return tracer_present; +} +#endif + +#ifdef ACUTEST_MACOS_ +static bool +acutest_AmIBeingDebugged(void) +{ + int junk; + int mib[4]; + struct kinfo_proc info; + size_t size; + + // Initialize the flags so that, if sysctl fails for some bizarre + // reason, we get a predictable result. + info.kp_proc.p_flag = 0; + + // Initialize mib, which tells sysctl the info we want, in this case + // we're looking for information about a specific process ID. + mib[0] = CTL_KERN; + mib[1] = KERN_PROC; + mib[2] = KERN_PROC_PID; + mib[3] = getpid(); + + // Call sysctl. + size = sizeof(info); + junk = sysctl(mib, sizeof(mib) / sizeof(*mib), &info, &size, NULL, 0); + assert(junk == 0); + + // We're being debugged if the P_TRACED flag is set. + return ( (info.kp_proc.p_flag & P_TRACED) != 0 ); +} +#endif + +int +main(int argc, char** argv) +{ + int i; + + acutest_argv0_ = argv[0]; + +#if defined ACUTEST_UNIX_ + acutest_colorize_ = isatty(STDOUT_FILENO); +#elif defined ACUTEST_WIN_ + #if defined _BORLANDC_ + acutest_colorize_ = isatty(_fileno(stdout)); + #else + acutest_colorize_ = _isatty(_fileno(stdout)); + #endif +#else + acutest_colorize_ = 0; +#endif + + /* Count all test units */ + acutest_list_size_ = 0; + for(i = 0; acutest_list_[i].func != NULL; i++) + acutest_list_size_++; + + acutest_test_data_ = (struct acutest_test_data_*)calloc(acutest_list_size_, sizeof(struct acutest_test_data_)); + if(acutest_test_data_ == NULL) { + fprintf(stderr, "Out of memory.\n"); + acutest_exit_(2); + } + + /* Parse options */ + acutest_cmdline_read_(acutest_cmdline_options_, argc, argv, acutest_cmdline_callback_); + + /* Initialize the proper timer. */ + acutest_timer_init_(); + +#if defined(ACUTEST_WIN_) + SetUnhandledExceptionFilter(acutest_seh_exception_filter_); +#ifdef _MSC_VER + _set_abort_behavior(0, _WRITE_ABORT_MSG); +#endif +#endif + + /* By default, we want to run all tests. */ + if(acutest_count_ == 0) { + for(i = 0; acutest_list_[i].func != NULL; i++) + acutest_remember_(i); + } + + /* Guess whether we want to run unit tests as child processes. */ + if(acutest_no_exec_ < 0) { + acutest_no_exec_ = 0; + + if(acutest_count_ <= 1) { + acutest_no_exec_ = 1; + } else { +#ifdef ACUTEST_WIN_ + if(IsDebuggerPresent()) + acutest_no_exec_ = 1; +#endif +#ifdef ACUTEST_LINUX_ + if(acutest_is_tracer_present_()) + acutest_no_exec_ = 1; +#endif +#ifdef ACUTEST_MACOS_ + if(acutest_AmIBeingDebugged()) + acutest_no_exec_ = 1; +#endif +#ifdef RUNNING_ON_VALGRIND + /* RUNNING_ON_VALGRIND is provided by optionally included */ + if(RUNNING_ON_VALGRIND) + acutest_no_exec_ = 1; +#endif + } + } + + if(acutest_tap_) { + /* TAP requires we know test result ("ok", "not ok") before we output + * anything about the test, and this gets problematic for larger verbose + * levels. */ + if(acutest_verbose_level_ > 2) + acutest_verbose_level_ = 2; + + /* TAP harness should provide some summary. */ + acutest_no_summary_ = 1; + + if(!acutest_worker_) + printf("1..%d\n", (int) acutest_count_); + } + + int index = acutest_worker_index_; + for(i = 0; acutest_list_[i].func != NULL; i++) { + int run = (acutest_test_data_[i].flags & ACUTEST_FLAG_RUN_); + if (acutest_skip_mode_) /* Run all tests except those listed. */ + run = !run; + if(run) + acutest_run_(´st_list_[i], index++, i); + } + + /* Write a summary */ + if(!acutest_no_summary_ && acutest_verbose_level_ >= 1) { + if(acutest_verbose_level_ >= 3) { + acutest_colored_printf_(ACUTEST_COLOR_DEFAULT_INTENSIVE_, "Summary:\n"); + + printf(" Count of all unit tests: %4d\n", (int) acutest_list_size_); + printf(" Count of run unit tests: %4d\n", acutest_stat_run_units_); + printf(" Count of failed unit tests: %4d\n", acutest_stat_failed_units_); + printf(" Count of skipped unit tests: %4d\n", (int) acutest_list_size_ - acutest_stat_run_units_); + } + + if(acutest_stat_failed_units_ == 0) { + acutest_colored_printf_(ACUTEST_COLOR_GREEN_INTENSIVE_, "SUCCESS:"); + printf(" All unit tests have passed.\n"); + } else { + acutest_colored_printf_(ACUTEST_COLOR_RED_INTENSIVE_, "FAILED:"); + printf(" %d of %d unit tests %s failed.\n", + acutest_stat_failed_units_, acutest_stat_run_units_, + (acutest_stat_failed_units_ == 1) ? "has" : "have"); + } + + if(acutest_verbose_level_ >= 3) + printf("\n"); + } + + if (acutest_xml_output_) { +#if defined ACUTEST_UNIX_ + char *suite_name = basename(argv[0]); +#elif defined ACUTEST_WIN_ + char suite_name[_MAX_FNAME]; + _splitpath(argv[0], NULL, NULL, suite_name, NULL); +#else + const char *suite_name = argv[0]; +#endif + fprintf(acutest_xml_output_, "\n"); + fprintf(acutest_xml_output_, "\n", + suite_name, (int)acutest_list_size_, acutest_stat_failed_units_, acutest_stat_failed_units_, + (int)acutest_list_size_ - acutest_stat_run_units_); + for(i = 0; acutest_list_[i].func != NULL; i++) { + struct acutest_test_data_ *details = ´st_test_data_[i]; + fprintf(acutest_xml_output_, " \n", acutest_list_[i].name, details->duration); + if (details->flags & ACUTEST_FLAG_FAILURE_) + fprintf(acutest_xml_output_, " \n"); + if (!(details->flags & ACUTEST_FLAG_FAILURE_) && !(details->flags & ACUTEST_FLAG_SUCCESS_)) + fprintf(acutest_xml_output_, " \n"); + fprintf(acutest_xml_output_, " \n"); + } + fprintf(acutest_xml_output_, "\n"); + fclose(acutest_xml_output_); + } + + acutest_cleanup_(); + + return (acutest_stat_failed_units_ == 0) ? 0 : 1; +} + + +#endif /* #ifndef TEST_NO_MAIN */ + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* #ifndef ACUTEST_H */ diff --git a/test/trie/fuzzy.h b/lsm/test/trie/fuzzy.h similarity index 73% rename from test/trie/fuzzy.h rename to lsm/test/trie/fuzzy.h index 00dc386..40850e9 100644 --- a/test/trie/fuzzy.h +++ b/lsm/test/trie/fuzzy.h @@ -1,11 +1,13 @@ -#ifndef AD3_FUZZYTEST -#define AD3_FUZZYTEST +#ifndef LSM_TRIE_FUZZY_TEST +#define LSM_TRIE_FUZZY_TEST #include #include #include #include -#include "trie.h" + +#include "lsm/trie.h" +#include "lsm/str_internal.h" typedef struct fuzzyconfig { int seed; @@ -38,7 +40,7 @@ void random_string(char* s, int len) { for (int i = 1; i < len - 1; i++) { val = rand(); - s[i] = (char)(val % 256); + s[i] = (char)(val % 255 + 1); } // Just in case no null characters were created @@ -61,6 +63,23 @@ char** init_string_matrix(int count, int len) { return matrix; } +lsm_str *lsm_random_string_matrix(int count, int max_len) { + lsm_str *matrix = calloc(count, sizeof(lsm_str)); + + for (int i = 0; i < count; i++) { + int len = rand() % max_len; + char *buf = malloc(len * sizeof(char)); + + for (int i = 0; i < len; i++) { + buf[i] = (char)(rand() % 255 + 1); + } + + lsm_str_overwrite(&matrix[i], buf); + } + + return matrix; +} + /** * Test a given trie implementation using randomly generated strings generated * using a given seed. @@ -78,8 +97,7 @@ char** init_string_matrix(int count, int len) { int fuzzy_test_trie_seed(FuzzyConfig conf) { srand(conf.seed); - char** matrix = init_string_matrix(conf.word_count, conf.word_length); - random_string_matrix(matrix, conf.word_count, conf.word_length); + lsm_str *matrix = lsm_random_string_matrix(conf.word_count, conf.word_length); bool* contains = calloc(conf.word_count, sizeof(bool)); // It's possible that the string matrix contains duplicate strings @@ -87,11 +105,11 @@ int fuzzy_test_trie_seed(FuzzyConfig conf) { for (int i = 0; i < conf.word_count; i++) { if (contains_dedupped[i] == NULL) { - contains_dedupped[i] = contains + i; + contains_dedupped[i] = &contains[i]; for (int j = i + 1; j < conf.word_count; j++) { - if (strcmp(matrix[i], matrix[j]) == 0) { - contains_dedupped[j] = contains + i; + if (lsm_str_eq(&matrix[i], &matrix[j])) { + contains_dedupped[j] = &contains[i]; } } } @@ -101,11 +119,10 @@ int fuzzy_test_trie_seed(FuzzyConfig conf) { // also correct size_t size = 0; - Trie *ct; - trie_init(&ct, NULL); + lsm_trie *trie; + lsm_trie_init(&trie); - bool changed; - TrieExitCode status; + lsm_error res; // 0: success // 1: invalid add @@ -116,13 +133,13 @@ int fuzzy_test_trie_seed(FuzzyConfig conf) { // Add all strings to trie, checking for duplicates for (int i = 0; i < conf.word_count; i++) { - status = trie_add(ct, matrix[i], NULL); + res = lsm_trie_insert(trie, &matrix[i], (void **)1); // if changed is false, *contains_dedupped[i] should be true, as changed // can only be false if the string is already contained in the trie. if // changed is true, *contains_dedupped[i] should be false, as the string // cannot be in the trie yet. - if (status == Ok && *contains_dedupped[i]) { + if (res == lsm_error_ok && *contains_dedupped[i]) { exit_code = 1; goto END; } @@ -134,43 +151,44 @@ int fuzzy_test_trie_seed(FuzzyConfig conf) { } // Ensure size is correct - if (trie_size(ct) != size) { - printf("%i %i\n", trie_size(ct), size); + if (lsm_trie_size(trie) != size) { + printf("%lu %lu\n", lsm_trie_size(trie), size); exit_code = 3; goto END; } // Remove all strings again, again taking duplicates into consideration - /* for (int i = 0; i < conf.word_count; i++) { */ - /* changed = remove_func(ct, matrix[i]); */ + for (int i = 0; i < conf.word_count; i++) { + res = lsm_trie_remove(NULL, trie, &matrix[i]); - /* // The string shouldn't be in the trie, yet another add operation */ - /* // says it added it as well */ - /* if (changed != *contains_dedupped[i]) { */ - /* exit_code = 2; */ - /* goto END; */ - /* } */ + // The string shouldn't be in the trie, yet another add operation + // says it added it as well + if (res == lsm_error_ok && !*contains_dedupped[i]) { + exit_code = 2; + goto END; + } - /* if (*contains_dedupped[i]) { */ - /* *contains_dedupped[i] = false; */ - /* size--; */ - /* } */ - /* } */ + if (*contains_dedupped[i]) { + *contains_dedupped[i] = false; + size--; + } + } // Finally, check that the trie is completely empty - /* if (size_func(ct) != 0) { */ - /* exit_code = 4; */ - /* } */ + if (lsm_trie_size(trie) != 0) { + printf("%lu %lu\n", lsm_trie_size(trie), size); + exit_code = 4; + } END: - trie_free(ct); + /* trie_free(ct); */ // Even testing functions should properly free memory free(contains); free(contains_dedupped); for (int i = 0; i < conf.word_count; i++) { - free(matrix[i]); + lsm_str_zero(&matrix[i]); } free(matrix); @@ -201,5 +219,4 @@ END: /* return -1; */ /* } */ - #endif diff --git a/lsm/test/trie/trie.c b/lsm/test/trie/trie.c new file mode 100644 index 0000000..db3e6ee --- /dev/null +++ b/lsm/test/trie/trie.c @@ -0,0 +1,32 @@ +#include "test.h" +#include "lsm.h" +#include "lsm/trie_internal.h" + +#define TRIE_INIT() \ + lsm_trie *trie; \ + TEST_CHECK(lsm_trie_init(&trie) == lsm_error_ok); \ + TEST_CHECK(trie != NULL) + +void test_init() { + TRIE_INIT(); + /* lsm_trie_free(trie); */ +} + +void test_insert_one() { + TRIE_INIT(); + + lsm_str *s; + lsm_str_init_copy(&s, "hello"); + TEST_CHECK(lsm_trie_insert(trie, s, (void *)1) == lsm_error_ok); + TEST_CHECK(lsm_trie_insert(trie, s, (void *)1) == lsm_error_already_present); + + void *data; + TEST_CHECK(lsm_trie_search(&data, trie, s) == lsm_error_ok); + TEST_CHECK(data == (void *)1); +} + +TEST_LIST = { + { "trie init", test_init }, + { "trie insert one", test_insert_one }, + { NULL, NULL } +}; diff --git a/lsm/test/trie/trie_fuzzy.c b/lsm/test/trie/trie_fuzzy.c new file mode 100644 index 0000000..9462c27 --- /dev/null +++ b/lsm/test/trie/trie_fuzzy.c @@ -0,0 +1,35 @@ +#include "test.h" +#include "lsm.h" +#include "lsm/trie_internal.h" +#include "fuzzy.h" + +void test_fuzzy() { + // Randomize seed + srand(time(NULL)); + + FuzzyConfig config; + int counter = 0; + int res; + + for (int len = 1; len < 25; len += 5) { + for (int count = 10; count <= 500; count += 10) { + for (int i = 0; i < 10; i++) { + counter++; + + config.seed = rand(); + config.word_length = len; + config.word_count = count; + + res = fuzzy_test_trie_seed(config); + TEST_CHECK_(res == 0, + "Failed config, seed = %i, len = %i, count = %i, code = %i", config.seed, config.word_length, config.word_count, res); + } + } + } + TEST_MSG("fuzzy tests done = %i", counter); +} + +TEST_LIST = { + { "trie fuzzy", test_fuzzy }, + { NULL, NULL} +}; diff --git a/src/event_loop/event_loop.c b/src/event_loop/event_loop.c index 53317bc..a01ca37 100644 --- a/src/event_loop/event_loop.c +++ b/src/event_loop/event_loop.c @@ -13,21 +13,19 @@ #include "event_loop.h" #include "log.h" -static int event_loop_fd_set_nb(int fd) { +static void event_loop_fd_set_nb(int fd) { int flags = fcntl(fd, F_GETFL); flags |= O_NONBLOCK; fcntl(fd, F_SETFL, flags); - - return 0; } event_loop *event_loop_init() { event_loop *el = calloc(sizeof(event_loop), 1); // No idea if this is a good starter value - el->connections = calloc(sizeof(event_loop_conn), 16); + el->connections = calloc(sizeof(event_loop_conn *), 16); el->connection_count = 16; return el; @@ -36,7 +34,7 @@ event_loop *event_loop_init() { int event_loop_put(event_loop *el, event_loop_conn *conn) { if ((size_t)conn->fd >= el->connection_count) { event_loop_conn **resized = - realloc(el->connections, sizeof(event_loop_conn) * (conn->fd + 1)); + realloc(el->connections, sizeof(event_loop_conn *) * (conn->fd + 1)); if (resized == NULL) { return -1; @@ -61,13 +59,7 @@ int event_loop_accept(event_loop *el, int fd) { } // set the new connection fd to nonblocking mode - int res = event_loop_fd_set_nb(connfd); - - if (res < 0) { - close(connfd); - - return -2; - } + event_loop_fd_set_nb(connfd); // creating the struct Conn event_loop_conn *conn = event_loop_conn_init(el); @@ -82,7 +74,7 @@ int event_loop_accept(event_loop *el, int fd) { conn->fd = connfd; conn->state = event_loop_conn_state_req; - res = event_loop_put(el, conn); + int res = event_loop_put(el, conn); if (res != 0) { close(connfd); @@ -126,16 +118,10 @@ void event_loop_run(event_loop *el, int port) { } // The listening socket is always poll'ed in non-blocking mode as well - res = event_loop_fd_set_nb(fd); - - if (res != 0) { - critical(1, "Failed to set listening socket to non-blocking, errno: %i", - errno); - } + event_loop_fd_set_nb(fd); // TODO don't hardcode the number 32 struct pollfd *poll_args = calloc(sizeof(struct pollfd), 32); - size_t poll_args_count; // for convenience, the listening fd is put in the first position struct pollfd pfd = {fd, POLLIN, 0}; @@ -147,7 +133,7 @@ void event_loop_run(event_loop *el, int port) { info("Starting event loop on port %i", port); while (1) { - poll_args_count = 1; + size_t poll_args_count = 1; // connection fds for (size_t i = 0; i < el->connection_count; i++) { @@ -160,7 +146,8 @@ void event_loop_run(event_loop *el, int port) { events = (conn->state == event_loop_conn_state_req) ? POLLIN : POLLOUT; events |= POLLERR; - struct pollfd pfd = {conn->fd, events, 0}; + pfd.fd = conn->fd; + pfd.events = events; poll_args[poll_args_count] = pfd; poll_args_count++; diff --git a/src/http/http_consts.c b/src/http/http_consts.c index 6a6e15c..8aa6f4b 100644 --- a/src/http/http_consts.c +++ b/src/http/http_consts.c @@ -96,7 +96,8 @@ const char *http_status_names[][32] = { const char *http_header_names[] = { "Connection", "Location", - "Content-Type" + "Content-Type", + "Content-Disposition" }; const char *http_mime_type_names[][2] = { diff --git a/src/http_loop/http_loop.c b/src/http_loop/http_loop.c index e5c7d20..cb4289e 100644 --- a/src/http_loop/http_loop.c +++ b/src/http_loop/http_loop.c @@ -4,6 +4,9 @@ #include "http_loop.h" #include "log.h" +const http_step http_default_res_steps[HTTP_LOOP_MAX_STEPS] = { + http_loop_step_write_header, http_loop_step_write_body, NULL}; + bool http_loop_handle_request(event_loop_conn *conn) { // Prevents the request handler function from looping indefinitely without // ever consuming new data @@ -46,26 +49,40 @@ bool http_loop_handle_request(event_loop_conn *conn) { return conn->state == event_loop_conn_state_req; } -event_loop *http_loop_init(http_loop_gctx *gctx) { +event_loop *http_loop_init(http_route *routes, size_t route_count, + void *custom_gctx, void *(*custom_ctx_init)(), + void(custom_ctx_reset)(), void(custom_ctx_free)()) { event_loop *el = event_loop_init(); el->ctx_init = (void *(*)(void *))http_loop_ctx_init; el->ctx_free = (void (*)(void *))http_loop_ctx_free; el->handle_data = http_loop_handle_request; - el->write_data = http_loop_write_response; + el->write_data = http_loop_handle_response; + + http_loop_gctx *gctx = http_loop_gctx_init(); + gctx->c = custom_gctx; + gctx->routes = routes; + gctx->route_count = route_count; + gctx->custom_ctx_init = custom_ctx_init; + gctx->custom_ctx_reset = custom_ctx_reset; + gctx->custom_ctx_free = custom_ctx_free; el->gctx = gctx; return el; } +void http_loop_set_api_key(http_loop *hl, const char *api_key) { + http_loop_gctx *gctx = hl->gctx; + gctx->api_key = api_key; +} + void http_loop_run(event_loop *el, int port) { debug("Compiling RegEx routes"); http_loop_gctx *gctx = el->gctx; - http_route *route; for (size_t i = 0; i < gctx->route_count; i++) { - route = &gctx->routes[i]; + http_route *route = &gctx->routes[i]; if (route->type == http_route_regex) { regex_t *r = calloc(sizeof(regex_t), 1); diff --git a/src/http_loop/http_loop_ctx.c b/src/http_loop/http_loop_ctx.c index b820725..8d0db1e 100644 --- a/src/http_loop/http_loop_ctx.c +++ b/src/http_loop/http_loop_ctx.c @@ -12,12 +12,14 @@ http_loop_gctx *http_loop_gctx_init() { http_loop_ctx *http_loop_ctx_init(http_loop_gctx *g) { http_loop_ctx *ctx = calloc(sizeof(http_loop_ctx), 1); ctx->g = g; + ctx->c = g->custom_ctx_init(); return ctx; } void http_loop_ctx_free(http_loop_ctx *ctx) { http_loop_ctx_reset(ctx); + ctx->g->custom_ctx_free(ctx->c); free(ctx); } @@ -45,4 +47,6 @@ void http_loop_ctx_reset(http_loop_ctx *ctx) { ctx->res.status = 0; ctx->res.head_len = 0; ctx->res.head_written = 0; + + ctx->g->custom_ctx_reset(ctx->c); } diff --git a/src/http_loop/http_loop_req.c b/src/http_loop/http_loop_req.c index 09b0ed4..a8cd841 100644 --- a/src/http_loop/http_loop_req.c +++ b/src/http_loop/http_loop_req.c @@ -138,13 +138,12 @@ void http_loop_process_request(event_loop_conn *conn) { ctx->current_step++; } - if (conn->state != event_loop_conn_state_req) { - return; - } - - // If we've reached the end of the list of step functions, we report the - // request as finished by clearing its route - if (ctx->route->steps[ctx->current_step] == NULL) { - http_loop_ctx_reset(ctx); + // Request processing can stop early by switching the connection state + // Either way, we reset the step counter as it will be used by the response + // steps + if ((conn->state != event_loop_conn_state_req) || + (ctx->route->steps[ctx->current_step] == NULL)) { + ctx->current_step = 0; + conn->state = event_loop_conn_state_res; } } diff --git a/src/http_loop/http_loop_res.c b/src/http_loop/http_loop_res.c index d2ae029..b29550f 100644 --- a/src/http_loop/http_loop_res.c +++ b/src/http_loop/http_loop_res.c @@ -1,6 +1,7 @@ #include "http_loop.h" #include "log.h" +// cppcheck-suppress syntaxError static const char *http_response_format = "HTTP/1.1 %i %s\n" "Server: lander/" LANDER_VERSION "\n" "Content-Length: %lu\n"; @@ -48,7 +49,7 @@ void http_loop_init_header(http_response *res) { res->head_len = buf_size + 1; } -void http_loop_write_response(event_loop_conn *conn) { +bool http_loop_step_write_header(event_loop_conn *conn) { http_response *res = &((http_loop_ctx *)conn->ctx)->res; // Create head response @@ -56,43 +57,71 @@ void http_loop_write_response(event_loop_conn *conn) { http_loop_init_header(res); } - // The final iteration marks the end of the response, after which we reset the - // context so a next request can be processed - if (res->head_written == res->head_len && - res->body.expected_len == res->body.len) { - http_loop_ctx_reset(conn->ctx); - conn->state = event_loop_conn_state_req; - return; + // Step has finished its work + if (res->head_written == res->head_len) { + return true; } - if (res->head_written < res->head_len) { - size_t bytes_to_write = MIN(res->head_len - res->head_written, - EVENT_LOOP_BUFFER_SIZE - conn->wbuf_size); - memcpy(&conn->wbuf[conn->wbuf_size], &res->head[res->head_written], + size_t bytes_to_write = MIN(res->head_len - res->head_written, + EVENT_LOOP_BUFFER_SIZE - conn->wbuf_size); + memcpy(&conn->wbuf[conn->wbuf_size], &res->head[res->head_written], + bytes_to_write); + + conn->wbuf_size += bytes_to_write; + res->head_written += bytes_to_write; + + return false; +} + +bool http_loop_step_write_body(event_loop_conn *conn) { + http_response *res = &((http_loop_ctx *)conn->ctx)->res; + + if (res->body.expected_len == res->body.len) { + return true; + } + + size_t bytes_to_write = MIN(res->body.expected_len - res->body.len, + EVENT_LOOP_BUFFER_SIZE - conn->wbuf_size); + + size_t bytes_written; + + switch (res->body.type) { + case http_body_buf: + memcpy(&conn->wbuf[conn->wbuf_size], &(res->body.buf)[res->body.len], bytes_to_write); - conn->wbuf_size += bytes_to_write; - res->head_written += bytes_to_write; + res->body.len += bytes_to_write; + break; + case http_body_file: + bytes_written = fread(&conn->wbuf[conn->wbuf_size], sizeof(uint8_t), + bytes_to_write, res->body.file); + conn->wbuf_size += bytes_written; + res->body.len += bytes_written; + break; } - if (res->body.len < res->body.expected_len) { - size_t bytes_to_write = MIN(res->body.expected_len - res->body.len, - EVENT_LOOP_BUFFER_SIZE - conn->wbuf_size); - size_t bytes_written; + return false; +} - switch (res->body.type) { - case http_body_buf: - memcpy(&conn->wbuf[conn->wbuf_size], &(res->body.buf)[res->body.len], - bytes_to_write); - conn->wbuf_size += bytes_to_write; - res->body.len += bytes_to_write; - break; - case http_body_file: - bytes_written = fread(&conn->wbuf[conn->wbuf_size], sizeof(uint8_t), - bytes_to_write, res->body.file); - conn->wbuf_size += bytes_written; - res->body.len += bytes_written; - break; - } +void http_loop_handle_response(event_loop_conn *conn) { + http_loop_ctx *ctx = conn->ctx; + + // Non-routed requests also need to be processed + const http_step *steps = + ctx->route != NULL ? ctx->route->steps_res : http_default_res_steps; + + while ((conn->state == event_loop_conn_state_res) && + (steps[ctx->current_step] != NULL) && steps[ctx->current_step](conn)) { + ctx->current_step++; + } + + // Response processing can stop early be switching the connection state + // After response processing has finished its work, we reset the context to + // prepare for a new request + if ((conn->state != event_loop_conn_state_res) || + (steps[ctx->current_step] == NULL)) { + http_loop_ctx_reset(ctx); + + conn->state = event_loop_conn_state_req; } } diff --git a/src/http_loop/http_loop_steps.c b/src/http_loop/http_loop_steps.c index 474cb72..99c5cce 100644 --- a/src/http_loop/http_loop_steps.c +++ b/src/http_loop/http_loop_steps.c @@ -1,7 +1,21 @@ #include +#include +#include "http_loop.h" #include "lander.h" +// Just a naive pow implementation; might improve later +static uint64_t ipow(uint64_t base, uint64_t power) { + uint64_t res = 1; + + while (power > 0) { + res *= base; + power--; + } + + return res; +} + /* * Converts a string to a number, returning true if the string contained a valid * positive number. @@ -16,7 +30,39 @@ static bool string_to_num(size_t *res, const char *s, size_t len) { return false; } - *res += val * (int)pow(10, (len - 1) - i); + *res += (uint64_t)val * ipow(10, (len - 1) - i); + } + + return true; +} + +bool http_loop_step_parse_content_length(event_loop_conn *conn) { + http_loop_ctx *ctx = conn->ctx; + + for (size_t i = 0; i < ctx->req.num_headers; i++) { + const struct phr_header *header = &ctx->req.headers[i]; + + if (strncmp(header->name, "Content-Length", header->name_len) == 0) { + // If the content length header is present but contains an invalid + // number, we return a bad request error + if (!string_to_num(&ctx->req.body.expected_len, header->value, + header->value_len)) { + ctx->res.status = http_bad_request; + conn->state = event_loop_conn_state_res; + + return true; + } + // The content length was actually 0, so we can instantly return here + else if (ctx->req.body.expected_len == 0) { + return true; + } + } + } + + // A zero here means there's no content length header + if (ctx->req.body.expected_len == 0) { + ctx->res.status = http_length_required; + conn->state = event_loop_conn_state_res; } return true; @@ -27,11 +73,11 @@ static bool string_to_num(size_t *res, const char *s, size_t len) { * if it was successful. If false is returned, the underlying step should * immediately exit. */ -static bool try_parse_content_length(event_loop_conn *conn) { +bool try_parse_content_length(event_loop_conn *conn) { http_loop_ctx *ctx = conn->ctx; for (size_t i = 0; i < ctx->req.num_headers; i++) { - struct phr_header *header = &ctx->req.headers[i]; + const struct phr_header *header = &ctx->req.headers[i]; if (strncmp(header->name, "Content-Length", header->name_len) == 0) { // If the content length header is present but contains an invalid @@ -70,7 +116,7 @@ bool http_loop_step_body_to_buf(event_loop_conn *conn) { } ctx->req.body.type = http_body_buf; - ctx->req.body.buf = malloc(ctx->req.body.expected_len * sizeof(uint8_t)); + ctx->req.body.buf = malloc(ctx->req.body.expected_len * sizeof(char)); ctx->req.body.len = 0; } @@ -111,7 +157,7 @@ bool http_loop_step_auth(event_loop_conn *conn) { http_loop_ctx *ctx = conn->ctx; for (size_t i = 0; i < ctx->req.num_headers; i++) { - struct phr_header *header = &ctx->req.headers[i]; + const struct phr_header *header = &ctx->req.headers[i]; if ((strncmp("X-Api-Key", header->name, header->name_len) == 0) && (strncmp(header->value, ctx->g->api_key, header->value_len) == 0) && diff --git a/src/lander/lander.c b/src/lander/lander.c index 60ee83d..5d1c1fe 100644 --- a/src/lander/lander.c +++ b/src/lander/lander.c @@ -1,24 +1,107 @@ #include +#include +#include "http/types.h" +#include "http_loop.h" #include "lander.h" +#include "lsm/store.h" + +const char lander_key_charset[] = + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; http_route lander_routes[] = { {.type = http_route_literal, .method = http_get, .path = "/", - .steps = {lander_get_index, NULL}}, - {.type = http_route_regex, - .method = http_get, - .path = "^/([^/]+)$", - .steps = {lander_get_entry, NULL}}, - {.type = http_route_regex, - .method = http_post, - .path = "^/s(l?)/([^/]*)$", - .steps = {http_loop_step_auth, http_loop_step_body_to_buf, - lander_post_redirect, NULL}}, + .steps = {lander_get_index, NULL}, + .steps_res = {http_loop_step_write_header, http_loop_step_write_body, + NULL}}, + { + .type = http_route_regex, + .method = http_get, + .path = "^/([^/]+)$", + .steps = {lander_get_entry, NULL}, + .steps_res = {http_loop_step_write_header, lander_stream_body_to_client, + NULL}, + }, + { + .type = http_route_regex, + .method = http_delete, + .path = "^/([^/]+)$", + .steps = {http_loop_step_auth, lander_remove_entry, NULL}, + .steps_res = {http_loop_step_write_header, http_loop_step_write_body, + NULL}, + }, + { + .type = http_route_regex, + .method = http_post, + .path = "^/s(l?)/([^/]*)$", + .steps = {http_loop_step_auth, lander_post_redirect, + http_loop_step_body_to_buf, lander_post_redirect_body_to_attr, + NULL}, + .steps_res = {http_loop_step_write_header, http_loop_step_write_body, + NULL}, + }, {.type = http_route_regex, .method = http_post, .path = "^/p(l?)/([^/]*)$", - .steps = {http_loop_step_auth, lander_post_paste, - http_loop_step_body_to_file, http_loop_step_switch_res, NULL}}, + .steps = {http_loop_step_auth, http_loop_step_parse_content_length, + lander_post_paste, lander_stream_body_to_entry, NULL}, + .steps_res = {http_loop_step_write_header, http_loop_step_write_body, + NULL}}, + {.type = http_route_regex, + .method = http_post, + .path = "^/f(l?)/([^/]*)$", + .steps = {http_loop_step_auth, http_loop_step_parse_content_length, + lander_post_file, lander_stream_body_to_entry, NULL}, + .steps_res = {http_loop_step_write_header, http_loop_step_write_body, + NULL}}, }; + +void *lander_gctx_init() { return calloc(1, sizeof(lander_gctx)); } + +void *lander_ctx_init() { return calloc(1, sizeof(lander_ctx)); } + +void lander_ctx_reset(lander_ctx *ctx) { + if (ctx->entry != NULL) { + lsm_entry_close(ctx->entry); + + ctx->entry = NULL; + } +} + +void lander_ctx_free(lander_ctx *ctx) { free(ctx); } + +void lander_header_to_attr(http_loop_ctx *ctx, const char *header_name, + lander_attr_type attr_type) { + lander_ctx *c_ctx = ctx->c; + + for (size_t i = 0; i < ctx->req.num_headers; i++) { + const struct phr_header *header = &ctx->req.headers[i]; + + if (strncmp(header->name, header_name, header->name_len) == 0) { + if (header->value_len > 0) { + lsm_str *value; + lsm_str_init_copy_n(&value, (char *)header->value, header->value_len); + + lsm_entry_attr_insert(c_ctx->entry, attr_type, value); + } + + return; + } + } +} + +void lander_attr_to_header(http_loop_ctx *ctx, lander_attr_type attr_type, + http_header header_type) { + lander_ctx *c_ctx = ctx->c; + lsm_str *value; + + if (lsm_entry_attr_get(&value, c_ctx->entry, attr_type) == lsm_error_ok) { + char *buf = malloc(lsm_str_len(value) + 1); + memcpy(buf, lsm_str_ptr(value), lsm_str_len(value)); + buf[lsm_str_len(value)] = '\0'; + + http_res_add_header(&ctx->res, header_type, buf, true); + } +} diff --git a/src/lander/lander_delete.c b/src/lander/lander_delete.c new file mode 100644 index 0000000..e91b6c9 --- /dev/null +++ b/src/lander/lander_delete.c @@ -0,0 +1,29 @@ +#include "lander.h" + +bool lander_remove_entry(event_loop_conn *conn) { + http_loop_ctx *ctx = conn->ctx; + lander_ctx *c_ctx = ctx->c; + http_loop_gctx *gctx = ctx->g; + lander_gctx *c_gctx = gctx->c; + + const char *key_s = &ctx->req.path[ctx->req.regex_groups[1].rm_so]; + int key_len = ctx->req.regex_groups[1].rm_eo - ctx->req.regex_groups[1].rm_so; + + lsm_str *key; + lsm_str_init_copy_n(&key, (char *)key_s, key_len); + + switch (lsm_store_open_write(&c_ctx->entry, c_gctx->store, key)) { + case lsm_error_ok: + break; + case lsm_error_not_found: + ctx->res.status = http_not_found; + return true; + default: + ctx->res.status = http_internal_server_error; + return true; + } + + lsm_entry_remove(c_ctx->entry); + + return true; +} diff --git a/src/lander/lander_get.c b/src/lander/lander_get.c index e4ba39b..102c631 100644 --- a/src/lander/lander_get.c +++ b/src/lander/lander_get.c @@ -1,6 +1,11 @@ #include +#include "event_loop.h" +#include "http/res.h" +#include "http/types.h" #include "lander.h" +#include "log.h" +#include "lsm/store.h" static const char index_page[] = "\n" @@ -22,30 +27,134 @@ bool lander_get_index(event_loop_conn *conn) { return true; } -bool lander_get_entry(event_loop_conn *conn) { +void lander_get_redirect(event_loop_conn *conn) { http_loop_ctx *ctx = conn->ctx; + lander_ctx *c_ctx = ctx->c; - const char *key = &ctx->req.path[ctx->req.regex_groups[1].rm_so]; - int key_len = ctx->req.regex_groups[1].rm_eo - ctx->req.regex_groups[1].rm_so; + // For redirects, the URL is stored as an in-memory attribute + lsm_str *url_attr_val; - Entry *entry; - TrieExitCode res = trie_search_len(ctx->g->trie, &entry, key, key_len); + // This shouldn't be able to happen + if (lsm_entry_attr_get(&url_attr_val, c_ctx->entry, lander_attr_type_url) != + lsm_error_ok) { + error("Entry of type redirect detected without URL attribute"); - if (res == NotFound) { - ctx->res.status = http_not_found; - } else if (entry->type == Redirect) { - ctx->res.status = http_moved_permanently; - http_res_add_header(&ctx->res, http_header_location, entry->string, false); - } else if (entry->type == Paste) { - char fname[strlen(ctx->g->data_dir) + 8 + key_len + 1]; - sprintf(fname, "%s/pastes/%.*s", ctx->g->data_dir, key_len, key); + ctx->res.status = http_internal_server_error; + lsm_entry_close(c_ctx->entry); + c_ctx->entry = NULL; - http_res_set_body_file(&ctx->res, fname); - // TODO don't call everything a text file - http_res_set_mime_type(&ctx->res, http_mime_txt); + return; } - conn->state = event_loop_conn_state_res; + char *buf = malloc(lsm_str_len(url_attr_val) + 1); + memcpy(buf, lsm_str_ptr(url_attr_val), lsm_str_len(url_attr_val)); + + buf[lsm_str_len(url_attr_val)] = '\0'; + + ctx->res.status = http_moved_permanently; + http_res_add_header(&ctx->res, http_header_location, buf, true); + + // We no longer need the entry at this point, so we can unlock it early + // This will also signal to the response code not to read any data from + // the entry + lsm_entry_close(c_ctx->entry); + c_ctx->entry = NULL; +} + +void lander_get_paste(event_loop_conn *conn) { + http_loop_ctx *ctx = conn->ctx; + lander_ctx *c_ctx = ctx->c; + + ctx->res.body.expected_len = lsm_entry_data_len(c_ctx->entry); + http_res_set_mime_type(&ctx->res, http_mime_txt); +} + +void lander_get_file(event_loop_conn *conn) { + http_loop_ctx *ctx = conn->ctx; + lander_ctx *c_ctx = ctx->c; + + ctx->res.body.expected_len = lsm_entry_data_len(c_ctx->entry); + lander_attr_to_header(ctx, lander_attr_type_content_type, + http_header_content_type); + + lsm_str *value; + char *buf; + + if (lsm_entry_attr_get(&value, c_ctx->entry, lander_attr_type_file_name) == + lsm_error_ok) { + buf = malloc(24 + lsm_str_len(value)); + int len = lsm_str_len(value); + sprintf(buf, "attachment; filename=\"%*s\"", len, lsm_str_ptr(value)); + } else { + buf = malloc(11); + strcpy(buf, "attachment"); + } + + http_res_add_header(&ctx->res, http_header_content_disposition, buf, true); +} + +bool lander_get_entry(event_loop_conn *conn) { + http_loop_ctx *ctx = conn->ctx; + lander_ctx *c_ctx = ctx->c; + http_loop_gctx *gctx = ctx->g; + lander_gctx *c_gctx = gctx->c; + + const char *key_s = &ctx->req.path[ctx->req.regex_groups[1].rm_so]; + int key_len = ctx->req.regex_groups[1].rm_eo - ctx->req.regex_groups[1].rm_so; + + lsm_str *key; + lsm_str_init_copy_n(&key, (char *)key_s, key_len); + + switch (lsm_store_open_read(&c_ctx->entry, c_gctx->store, key)) { + case lsm_error_ok: + break; + case lsm_error_not_found: + ctx->res.status = http_not_found; + conn->state = event_loop_conn_state_res; + return true; + default: + ctx->res.status = http_internal_server_error; + conn->state = event_loop_conn_state_res; + return true; + } + + lander_entry_type t; + lsm_entry_attr_get_uint8_t((uint8_t *)&t, c_ctx->entry, + lander_attr_type_entry_type); + + switch (t) { + case lander_entry_type_redirect: + lander_get_redirect(conn); + break; + case lander_entry_type_paste: + lander_get_paste(conn); + break; + case lander_entry_type_file: + lander_get_file(conn); + break; + } return true; } + +bool lander_stream_body_to_client(event_loop_conn *conn) { + http_loop_ctx *ctx = conn->ctx; + lander_ctx *c_ctx = ctx->c; + + if ((c_ctx->entry == NULL) || + (ctx->res.body.expected_len == ctx->res.body.len)) { + return true; + } + + uint64_t to_write = MIN(EVENT_LOOP_BUFFER_SIZE - conn->wbuf_size, + ctx->res.body.expected_len - ctx->res.body.len); + + uint64_t read = 0; + lsm_entry_data_read(&read, (char *)&conn->wbuf[conn->wbuf_size], c_ctx->entry, + to_write); + + ctx->res.body.len += read; + conn->wbuf_size += read; + + return false; +} diff --git a/src/lander/lander_post.c b/src/lander/lander_post.c index f1b686c..9711d03 100644 --- a/src/lander/lander_post.c +++ b/src/lander/lander_post.c @@ -1,116 +1,128 @@ #include "http/res.h" +#include "http/types.h" #include "lander.h" #include "log.h" +#include "lsm/store.h" -// TODO entry leaks if key is already present -static bool add_entry(char **key_ptr, int *key_len_ptr, http_loop_ctx *ctx, - Entry *entry, bool random) { - // The first match group matches the "long" path - bool secure = - (ctx->req.regex_groups[1].rm_eo - ctx->req.regex_groups[1].rm_so) == 1; +static void randomize_key(char *key, int len) { + size_t charset_len = strlen(lander_key_charset); - char *key; - int key_len = 0; - TrieExitCode res; - - if (random) { - res = trie_add_random(ctx->g->trie, &key, entry, secure); - - if (res == Ok) { - key_len = strlen(key); - } - } else { - key = (char *)&ctx->req.path[ctx->req.regex_groups[2].rm_so]; - key_len = ctx->req.regex_groups[2].rm_eo - ctx->req.regex_groups[2].rm_so; - - res = trie_add_len(ctx->g->trie, key, key_len, entry); + for (int i = 0; i < len; i++) { + key[i] = lander_key_charset[rand() % charset_len]; } - switch (res) { - case Ok: - break; - case AlreadyPresent: + key[len] = '\0'; +} + +/** + * Insert a new entry into the store. + * + * @return true on success, false otherwise + */ +bool lander_insert_entry(http_loop_ctx *ctx) { + http_loop_gctx *gctx = ctx->g; + lander_gctx *c_gctx = gctx->c; + lander_ctx *c_ctx = ctx->c; + + lsm_str *key; + int key_len; + + if (ctx->req.regex_groups[2].rm_eo == ctx->req.regex_groups[2].rm_so) { + // Generate a random key to insert + bool secure = + (ctx->req.regex_groups[1].rm_eo - ctx->req.regex_groups[1].rm_so) == 1; + key_len = secure ? 16 : 4; + char *key_s = malloc((key_len + 1) * sizeof(char)); + + randomize_key(key_s, key_len); + lsm_str_init(&key, key_s); + } else { + const char *key_s = &ctx->req.path[ctx->req.regex_groups[2].rm_so]; + key_len = ctx->req.regex_groups[2].rm_eo - ctx->req.regex_groups[2].rm_so; + + lsm_str_init_copy_n(&key, key_s, key_len); + } + + // TODO free key on error + switch (lsm_store_insert(&c_ctx->entry, c_gctx->store, key)) { + case lsm_error_already_present: ctx->res.status = http_conflict; return false; + case lsm_error_ok: + break; default: ctx->res.status = http_internal_server_error; return false; } - // Add a slash to the key and add it as the location header + // Add location header char *buf = malloc(key_len + 2); - - memcpy(&buf[1], key, key_len); + memcpy(&buf[1], lsm_str_ptr(key), key_len); buf[0] = '/'; buf[key_len + 1] = '\0'; http_res_add_header(&ctx->res, http_header_location, buf, true); ctx->res.status = http_created; - if (key_ptr != NULL) { - *key_ptr = key; - } - - if (key_len_ptr != NULL) { - *key_len_ptr = key_len; - } - return true; } bool lander_post_redirect(event_loop_conn *conn) { http_loop_ctx *ctx = conn->ctx; - bool random = - ctx->req.regex_groups[2].rm_eo == ctx->req.regex_groups[2].rm_so; + lander_ctx *c_ctx = ctx->c; - // Allocate a new buffer to pass to the trie - char *url = malloc(ctx->req.body.len + 1); - memcpy(url, ctx->req.body.buf, ctx->req.body.len); - url[ctx->req.body.len] = '\0'; - - Entry *new_entry = entry_new(Redirect, url); - - // The entry duplicates the string - free(url); - - // We don't check the result here, because we would perform the same action - // either way - char *key; - add_entry(&key, NULL, ctx, new_entry, random); - - if (random) { - free(key); + if (!lander_insert_entry(ctx)) { + conn->state = event_loop_conn_state_res; + return true; } - conn->state = event_loop_conn_state_res; + lsm_entry_attr_insert_uint8_t(c_ctx->entry, lander_attr_type_entry_type, + lander_entry_type_redirect); + + return true; +} + +bool lander_post_redirect_body_to_attr(event_loop_conn *conn) { + http_loop_ctx *ctx = conn->ctx; + lander_ctx *c_ctx = ctx->c; + + lsm_str *attr_value; + lsm_str_init_copy_n(&attr_value, ctx->req.body.buf, ctx->req.body.len); + lsm_entry_attr_insert(c_ctx->entry, lander_attr_type_url, attr_value); return true; } bool lander_post_paste(event_loop_conn *conn) { http_loop_ctx *ctx = conn->ctx; - bool random = - ctx->req.regex_groups[2].rm_eo == ctx->req.regex_groups[2].rm_so; + lander_ctx *c_ctx = ctx->c; - char *key; - int key_len; - Entry *new_entry = entry_new(Paste, ""); - - if (!add_entry(&key, &key_len, ctx, new_entry, random)) { + if (!lander_insert_entry(ctx)) { conn->state = event_loop_conn_state_res; - return true; } - char *fname = malloc(strlen(ctx->g->data_dir) + 8 + key_len + 1); - sprintf(fname, "%s/pastes/%.*s", ctx->g->data_dir, key_len, key); - - ctx->req.body.fname = fname; - ctx->req.body.fname_owned = true; - - if (random) { - free(key); - } + lsm_entry_attr_insert_uint8_t(c_ctx->entry, lander_attr_type_entry_type, + lander_entry_type_paste); + lander_header_to_attr(ctx, "X-Lander-Filename", lander_attr_type_file_name); + + return true; +} + +bool lander_post_file(event_loop_conn *conn) { + http_loop_ctx *ctx = conn->ctx; + lander_ctx *c_ctx = ctx->c; + + if (!lander_insert_entry(ctx)) { + conn->state = event_loop_conn_state_res; + return true; + } + + lsm_entry_attr_insert_uint8_t(c_ctx->entry, lander_attr_type_entry_type, + lander_entry_type_file); + lander_header_to_attr(ctx, "X-Lander-Content-Type", + lander_attr_type_content_type); + lander_header_to_attr(ctx, "X-Lander-Filename", lander_attr_type_file_name); return true; } diff --git a/src/lander/lander_steps.c b/src/lander/lander_steps.c new file mode 100644 index 0000000..7804df5 --- /dev/null +++ b/src/lander/lander_steps.c @@ -0,0 +1,22 @@ +#include + +#include "lander.h" + +bool lander_stream_body_to_entry(event_loop_conn *conn) { + http_loop_ctx *ctx = conn->ctx; + lander_ctx *c_ctx = ctx->c; + + uint64_t to_append = + MIN(conn->rbuf_size - conn->rbuf_read, + ctx->req.body.expected_len - lsm_entry_data_len(c_ctx->entry)); + + lsm_str *data; + lsm_str_init_copy_n(&data, (char *)&conn->rbuf[conn->rbuf_read], to_append); + lsm_entry_data_append(c_ctx->entry, data); + + conn->rbuf_read += to_append; + + lsm_str_free(data); + + return lsm_entry_data_len(c_ctx->entry) == ctx->req.body.expected_len; +} diff --git a/src/main.c b/src/main.c index fa9d95e..2f52fc1 100644 --- a/src/main.c +++ b/src/main.c @@ -1,4 +1,6 @@ #include +#include +#include #include "lander.h" #include "log.h" @@ -7,51 +9,47 @@ const char *var = getenv(env_var); \ if (var == NULL) { \ critical(1, "Missing environment variable %s", env_var); \ - } \ - var = strdup(var); + } #define ENV_OPT(var, env_var, default) \ const char *var = getenv(env_var); \ if (var == NULL) { \ - var = strdup(default); \ - } else { \ - var = strdup(var); \ + var = default; \ } int main() { setvbuf(stdout, NULL, _IONBF, 0); + srand(time(NULL)); ENV(api_key, "LANDER_API_KEY"); ENV_OPT(port_str, "LANDER_PORT", "18080"); - ENV_OPT(data_dir, "LANDER_DATA_DIR", "."); + ENV_OPT(data_dir_s, "LANDER_DATA_DIR", "."); int port = atoi(port_str); - if (port <= 0 || port >= 2 << 16) { + if (port <= 0 || port >= 1 << 16) { critical(1, "Invalid TCP port %s", port_str); } - char file_path[strlen(data_dir) + 12 + 1]; - sprintf(file_path, "%s/lander.data", data_dir); + lander_gctx *c_gctx = lander_gctx_init(); + c_gctx->data_dir = data_dir_s; - info("Initializing trie from file '%s'", file_path); + lsm_str *data_dir; + lsm_str_init_copy(&data_dir, (char *)data_dir_s); - Trie *trie; - TrieExitCode res = trie_init(&trie, file_path); + info("Initializing store from path '%s'", data_dir_s); - if (res != Ok) { - critical(1, "An error occured while populating the trie."); + if (lsm_store_load(&c_gctx->store, data_dir) != lsm_error_ok) { + critical(2, "Failed to load existing store."); } - info("Trie initialized and populated with %i entries", trie_size(trie)); + info("Store loaded containing %lu entries", lsm_store_size(c_gctx->store)); - http_loop_gctx *gctx = http_loop_gctx_init(); - gctx->trie = trie; - gctx->routes = lander_routes; - gctx->route_count = sizeof(lander_routes) / sizeof(lander_routes[0]); - gctx->api_key = api_key; - gctx->data_dir = data_dir; - event_loop *el = http_loop_init(gctx); + http_loop *hl = http_loop_init( + lander_routes, sizeof(lander_routes) / sizeof(lander_routes[0]), c_gctx, + lander_ctx_init, (void (*)(void *))lander_ctx_reset, + (void (*)(void *))lander_ctx_free); + http_loop_set_api_key(hl, api_key); - http_loop_run(el, port); + http_loop_run(hl, port); } diff --git a/src/main.cpp b/src/main.cpp deleted file mode 100644 index a36505f..0000000 --- a/src/main.cpp +++ /dev/null @@ -1,228 +0,0 @@ -#include -#include - -#include "crow.h" - -extern "C" { -#include "trie.h" -} - -static const std::string file_path = "lander.data"; -static const std::string index_page = R"( - - - -

r8r.be

-

This is the URL shortener and pastebin accompanying my site, The Rusty Bever.

- - -)"; - -#define ENV(var, env_var) \ - const char *_##var = getenv(env_var); \ - if (_##var == NULL) { \ - printf("Missing environment variable %s.\n", env_var); \ - return 1; \ - } \ - const std::string var = std::string(_##var); - -#define AUTH() \ - std::string provided_api_key = req.get_header_value("X-Api-Key"); \ - if (api_key.compare(provided_api_key) != 0) { \ - return crow::response(crow::status::UNAUTHORIZED); \ - } - -crow::response add_redirect(std::string base_url, Trie *trie, const char *url, - bool secure) { - Entry *new_entry = entry_new(Redirect, url); - - // The key already gets copied into the trie, so this pointer is safe to use - // ever after unlocking the trie - trie_wlock(trie); - char *key; - TrieExitCode res = trie_add_random(trie, &key, new_entry, secure); - trie_unlock(trie); - - if (res != Ok) { - return crow::response(crow::status::INTERNAL_SERVER_ERROR); - } - - std::string out = base_url + key; - free(key); - - return crow::response(out); -} - -bool store_paste(const char *key, const char *body) { - // Write paste contents to file - std::fstream file; - file.open(std::string("pastes/") + key, std::ios_base::out); - - if (!file.is_open()) { - return false; - } - - file << body; - file.close(); - - return true; -} - -crow::response add_paste(std::string base_url, Trie *trie, const char *body, - bool secure) { - Entry *new_entry = entry_new(Paste, ""); - - trie_wlock(trie); - char *key; - TrieExitCode res = trie_add_random(trie, &key, new_entry, secure); - trie_unlock(trie); - - if (res != Ok) { - return crow::response(crow::status::INTERNAL_SERVER_ERROR); - } - - if (!store_paste(key, body)) { - return crow::response(crow::status::INTERNAL_SERVER_ERROR); - } - - std::string out = base_url + key; - free(key); - - return crow::response(out); -} - -int main() { - // Initialize random seed for generating URLs - srand(time(NULL)); - - ENV(api_key, "LANDER_API_KEY"); - ENV(base_url, "LANDER_BASE_URL"); - - std::cout << "Initializing trie from file '" << file_path << "'..." - << std::endl; - - // Initialize trie and populate from data file - Trie *trie; - int res = trie_init(&trie, file_path.c_str()); - - if (res != 0) { - std::cout << "An error occured while initializing the trie." << std::endl; - - exit(1); - } - - std::cout << "Added " << trie_size(trie) << " entries to trie." << std::endl; - - // Create pastes directory if not present - // TODO don't just ignore errors here - mkdir("pastes", 0700); - - crow::SimpleApp app; - app.loglevel(crow::LogLevel::Info); - - CROW_ROUTE(app, "/").methods(crow::HTTPMethod::Get)( - []() { return crow::response("html", index_page); }); - - // Serve an entry - CROW_ROUTE(app, "/") - .methods(crow::HTTPMethod::Get)( - [trie](crow::response &res, std::string key) { - trie_rlock(trie); - Entry *entry; - TrieExitCode status = trie_search(trie, &entry, key.c_str()); - - if (status == Ok) { - if (entry->type == Redirect) { - res.redirect(entry->string); - } else if (entry->type == Paste) { - res.set_static_file_info("pastes/" + key); - } - } else { - res.code = 404; - } - - res.end(); - trie_unlock(trie); - }); - - // Add a new Redirect with a short randomly generated key - CROW_ROUTE(app, "/s/") - .methods(crow::HTTPMethod::Post)( - [api_key, base_url, trie](const crow::request req) { - AUTH(); - - return add_redirect(base_url, trie, req.body.c_str(), false); - }); - - // Add a new Redirect with a long randomly generated key - CROW_ROUTE(app, "/sl/") - .methods(crow::HTTPMethod::Post)( - [api_key, base_url, trie](const crow::request req) { - AUTH(); - - return add_redirect(base_url, trie, req.body.c_str(), true); - }); - - // Add a new Redirect with a given key - CROW_ROUTE(app, "/s/") - .methods(crow::HTTPMethod::Post)( - [api_key, base_url, trie](const crow::request &req, std::string key) { - AUTH(); - - Entry *new_entry = entry_new(Redirect, req.body.c_str()); - - trie_wlock(trie); - TrieExitCode status = trie_add(trie, key.c_str(), new_entry); - trie_unlock(trie); - - switch (status) { - case Ok: - return crow::response(base_url + key); - case AlreadyPresent: - return crow::response(crow::status::CONFLICT); - default: - return crow::response(crow::status::INTERNAL_SERVER_ERROR); - } - }); - - // Add a new Paste with a short randomly generated key - CROW_ROUTE(app, "/p/") - .methods(crow::HTTPMethod::Post)( - [api_key, base_url, trie](const crow::request &req) { - AUTH(); - - return add_paste(base_url, trie, req.body.c_str(), false); - }); - - // Add a new Paste with a long randomly generated key - CROW_ROUTE(app, "/pl/") - .methods(crow::HTTPMethod::Post)( - [api_key, base_url, trie](const crow::request &req) { - AUTH(); - - return add_paste(base_url, trie, req.body.c_str(), true); - }); - - // Add a paste with a given key - CROW_ROUTE(app, "/p/") - .methods(crow::HTTPMethod::Post)( - [api_key, base_url, trie](const crow::request &req, std::string key) { - AUTH(); - - Entry *new_entry = entry_new(Paste, ""); - trie_wlock(trie); - TrieExitCode status = trie_add(trie, key.c_str(), new_entry); - trie_unlock(trie); - - if (status != Ok) { - return crow::response(crow::status::CONFLICT); - } - - if (!store_paste(key.c_str(), req.body.c_str())) { - return crow::response(crow::status::INTERNAL_SERVER_ERROR); - } - - return crow::response(base_url + key); - }); - app.port(18080).multithreaded().run(); -} diff --git a/src/trie/trie.c b/src/trie/trie.c deleted file mode 100644 index b1262a7..0000000 --- a/src/trie/trie.c +++ /dev/null @@ -1,428 +0,0 @@ -#include -#include -#include -#include -#include - -#include "trie.h" -#include "trie_entry.h" -#include "trie_node.h" - -typedef struct ttrie { - TrieNode *root; - size_t size; - char *file_path; - pthread_rwlock_t lock; -} Trie; - -TrieExitCode trie_add_no_lock(Trie *trie, const char *key, Entry *entry); - -/** - * Allocate and initialize an empty Trie - * - * @return pointer to the empty Trie - */ -TrieExitCode trie_init(Trie **trie_ptr, const char *file_path) { - // Allocate & initialize trie - Trie *trie = calloc(1, sizeof(Trie)); - trie->root = tnode_init(); - pthread_rwlock_init(&trie->lock, NULL); - - if (file_path == NULL) { - trie->file_path = NULL; - *trie_ptr = trie; - return Ok; - } - - trie->file_path = strdup(file_path); - - // Populate trie with data from file - FILE *fp = fopen(file_path, "r"); - - if (fp == NULL) { - return FileError; - } - - // We read in lines of at most 8192 characters (sounds like enough) - char buffer[8192]; - EntryType type; - Entry *entry; - int i, j; - TrieExitCode status; - - while (fgets(buffer, 8192, fp)) { - i = 0; - - // Move index in buffer until we encounter first space character - while (buffer[i] != ' ') { - i++; - } - - // Split the buffer into two strings, the key and the payload - buffer[i] = '\0'; - - type = entry_type_from_char(buffer[i + 1]); - - // Skip type character & its surrounding spaces - j = i + 3; - - // Now remove the newline character - while (buffer[j] != '\n') { - j++; - } - - buffer[j] = '\0'; - - entry = entry_new(type, buffer + i + 3); - status = trie_add_no_lock(trie, buffer, entry); - - if (status != Ok) { - trie_free(trie); - return status; - } - } - - fclose(fp); - - *trie_ptr = trie; - - return Ok; -} - -/** - * De-allocate a TernaryTree by freeing its entire underlying structure. - * - * @param trie trie to free - */ -void trie_free(Trie *trie) { - tnode_free(trie->root); - free(trie); -} - -typedef struct searchresult { - TrieNode *parent; - TrieNode *child; -} SearchResult; - -SearchResult trie_search_node_len(Trie *trie, const char *key, size_t key_len) { - SearchResult out = {NULL, NULL}; - - size_t i = 0; - TrieNode **node_ptr = &(trie->root); - TrieNode **child_ptr; - - do { - child_ptr = tnode_search(*node_ptr, key[i], false); - - // We don't have to check whether *node_ptr is NULL, because if it was - // NULL, it wouldn't be in the binary tree. - if (child_ptr == NULL) { - return out; - } - - i++; - - if (memcmp((*child_ptr)->string, key + i, (*child_ptr)->string_len) != 0) { - return out; - } - - i += (*child_ptr)->string_len; - - if (i < key_len) { - node_ptr = child_ptr; - } - } while (i < key_len); - - // At this point, we've either arrived at an empty child, or traversed through - // the entire string. Therefore, all we have to do is check whether we're at - // the end of the string and if node represents a string. - if (i == key_len && (*child_ptr)->represents) { - out.parent = *node_ptr; - out.child = *child_ptr; - } - - return out; -} - -SearchResult trie_search_node(Trie *trie, const char *key) { - return trie_search_node_len(trie, key, strlen(key)); -} - -/** - * Returns whether the given string is present in the trie. - * - * @param trie trie to look in - * @param string string to look up - * @return true if the string is present in the trie, false otherwise - */ -TrieExitCode trie_search_len(Trie *trie, Entry **entry_ptr, const char *key, - size_t key_len) { - SearchResult res = trie_search_node_len(trie, key, key_len); - - if (res.child == NULL) { - return NotFound; - } - - *entry_ptr = res.child->entry; - - return Ok; -} - -TrieExitCode trie_search(Trie *trie, Entry **entry_ptr, const char *key) { - return trie_search_len(trie, entry_ptr, key, strlen(key)); -} - -/** - * Add the given string to the Trie. - * - * @param trie trie to add string to - * @param string string to add - * @return true if the string wasn't present in the trie and thus added, false - * otherwise - */ -TrieExitCode trie_add_len_no_lock(Trie *trie, const char *key, size_t key_len, - Entry *entry) { - size_t i = 0; - uint8_t offset; - TrieNode **node_ptr = &(trie->root); - TrieNode **child_node_ptr; - TrieNode *child_node; - - do { - offset = 0; - child_node_ptr = tnode_search(*node_ptr, key[i], true); - - i++; - - // We've reached a NULL child, so we add the remaining part of the string - // here - if (*child_node_ptr == NULL) { - child_node = tnode_init(); - - while (offset < TRIE_MAX_SKIP_SIZE && i + offset < key_len) { - offset++; - } - - memcpy(child_node->string, key + i, offset); - - child_node->string_len = offset; - *child_node_ptr = child_node; - - // If the remaining part of the string is still longer than the maximum - // allowed skip length, we continue through the loop. The next iteration - // will enter this if statement again, and perform the same loop, until - // the string is fully added to the trie. - if (i + offset < key_len) { - node_ptr = child_node_ptr; - i += offset; - - continue; - } - - child_node->represents = true; - child_node->entry = entry; - - trie->size++; - return Ok; - } - - while (offset < (*child_node_ptr)->string_len) { - // String no longer aligns with edge, so we have to split - if (key[i + offset] != (*child_node_ptr)->string[offset]) { - TrieNode *split_node = tnode_init(); - child_node = *child_node_ptr; - - // New string of the split node is the prefix that we were able - // to skip - if (offset > 0) { - memcpy(split_node->string, child_node->string, offset); - split_node->string_len = offset; - } - - // split_node replaces child_node as the child of node - *child_node_ptr = split_node; - TrieNode **new_node_ptr = - tnode_search(split_node, child_node->string[offset], true); - *new_node_ptr = child_node; - - // child_node has now become a child of split_node, so we update its - // string accordingely by removing the skipped prefix + the one - // character that's already stored by being a child of split_node - /* char *old_string = child_node->string.ptr; */ - uint8_t new_skip_len = child_node->string_len - (offset + 1); - - if (new_skip_len > 0) { - char old_string[TRIE_MAX_SKIP_SIZE]; - memcpy(old_string, child_node->string + offset + 1, new_skip_len); - memcpy(child_node->string, old_string, new_skip_len); - } - - child_node->string_len = new_skip_len; - - // The while loop will exit either way after this has happened, as - // child_node is now split_node and split_node's len is already set to - // offset. - break; - } - - offset++; - } - - node_ptr = child_node_ptr; - - i += offset; - } while (i < key_len); - - if ((*child_node_ptr)->represents) { - return AlreadyPresent; - } - - (*child_node_ptr)->represents = true; - (*child_node_ptr)->entry = entry; - trie->size++; - return Ok; -} - -TrieExitCode trie_add_no_lock(Trie *trie, const char *key, Entry *entry) { - return trie_add_len_no_lock(trie, key, strlen(key), entry); -} - -TrieExitCode trie_add_len(Trie *trie, const char *key, size_t key_len, - Entry *entry) { - if (trie->file_path != NULL) { - // Easiest way to make sure we don't add duplicate entries - // We use an internal function that doesn't require a read lock, as we're - // already inside a write lock - if (trie_search_node_len(trie, key, key_len).child != NULL) { - return AlreadyPresent; - } - - FILE *fp = fopen(trie->file_path, "a"); - - if (fp == NULL) { - return FileError; - } - - fputs(key, fp); - fputs(" ", fp); - fputc(entry_type_to_char(entry->type), fp); - fputs(" ", fp); - fputs(entry->string, fp); - fputs("\n", fp); - - fclose(fp); - } - - // This function *should* always return Ok. Otherwise, the function would've - // exited because the string was found in the trie. - return trie_add_len_no_lock(trie, key, key_len, entry); -} - -TrieExitCode trie_add(Trie *trie, const char *key, Entry *entry) { - return trie_add_len(trie, key, strlen(key), entry); -} - -TrieExitCode trie_add_random(Trie *trie, char **key_ptr, Entry *entry, - bool secure) { - // Generate random key - bool ok = false; - int key_length = secure ? RANDOM_KEY_LENGTH_LONG : RANDOM_KEY_LENGTH_SHORT; - char *key = malloc(key_length + 1); - key[key_length] = '\0'; - - // We naively generate new keys until we find a key that isn't in the trie - // yet. With charset_len ** RANDOM_KEY_LENGTH sufficiently large, this isn't a - // problem, because the chances of collisions are extremely small. - while (!ok) { - for (int i = 0; i < key_length; i++) { - key[i] = charset[rand() % charset_len]; - } - - ok = trie_search_node(trie, key).child == NULL; - } - - TrieExitCode return_value = trie_add(trie, key, entry); - - if (return_value == Ok) { - *key_ptr = key; - } else { - free(key); - } - - return return_value; -} - -/** - * Remove the given string from a Trie. - * - * @param trie trie to remove string from - * @param string string to remove - * @return true if the string was in the trie and thus removed, false otherwise - */ -/* bool trie_remove(Trie *trie, const char *string) { */ -/* pthread_rwlock_wrlock(&trie->lock); */ - -/* bool return_value = false; */ - -/* SearchResult res = trie_search_node(trie, string); */ - -/* if (res.child == NULL) { */ -/* goto end; */ -/* } */ - -/* trie->size--; */ -/* return_value = true; */ - -/* if (res.parent != NULL) { */ -/* // We're removing a full leaf, so we calculate the offset of the - * character */ -/* // to remove from the parent */ -/* if (res.child->type == 2) { */ -/* size_t str_len = strlen(string); */ -/* size_t suffix_len = strlen(res.child->ptr.string); */ - -/* tnode_remove(res.parent, string[str_len - suffix_len - 1]); */ -/* } */ -/* // In the other case, the character to remove from the parent is the last - */ -/* // character of the string */ -/* else if (res.child->size == 0) { */ -/* size_t i = 0; */ - -/* while (string[i + 1] != DELIMITER) { */ -/* i++; */ -/* } */ - -/* tnode_remove(res.parent, string[i]); */ -/* } else { */ -/* res.child->type = 0; */ - -/* goto end; */ -/* } */ - -/* tnode_free(res.child); */ -/* } */ -/* // We're in the root here */ -/* else { */ -/* res.child->type = 0; */ -/* } */ - -/* end: */ -/* pthread_rwlock_unlock(&trie->lock); */ - -/* return return_value; */ -/* } */ - -/** - * Return the current size of the given trie. - * - * @param trie trie to return size for - * @return size of the trie - */ -size_t trie_size(Trie *trie) { return trie->size; } - -int trie_rlock(Trie *trie) { return pthread_rwlock_rdlock(&trie->lock); } - -int trie_wlock(Trie *trie) { return pthread_rwlock_wrlock(&trie->lock); } - -int trie_unlock(Trie *trie) { return pthread_rwlock_unlock(&trie->lock); } diff --git a/src/trie/trie_entry.c b/src/trie/trie_entry.c deleted file mode 100644 index 2aa8cdf..0000000 --- a/src/trie/trie_entry.c +++ /dev/null @@ -1,37 +0,0 @@ -#include "trie_entry.h" -#include - -EntryType entry_type_from_char(char c) { - switch (c) { - case '0': - return Redirect; - case '1': - return Paste; - default: - return Unknown; - } -} - -char entry_type_to_char(EntryType et) { - switch (et) { - case Redirect: - return '0'; - case Paste: - return '1'; - default: - return '\0'; - } -} - -Entry *entry_new(EntryType type, const char *string) { - Entry *entry = malloc(sizeof(Entry)); - entry->type = type; - - if (string != NULL) { - entry->string = strdup(string); - } else { - entry->string = NULL; - } - - return entry; -} diff --git a/src/trie/trie_entry.h b/src/trie/trie_entry.h deleted file mode 100644 index b3ca8cd..0000000 --- a/src/trie/trie_entry.h +++ /dev/null @@ -1,7 +0,0 @@ -#include "trie.h" - -EntryType entry_type_from_char(char c); - -char entry_type_to_char(EntryType et); - -Entry *entry_new(EntryType type, const char *string); diff --git a/src/trie/trie_node.c b/src/trie/trie_node.c deleted file mode 100644 index 0da5163..0000000 --- a/src/trie/trie_node.c +++ /dev/null @@ -1,265 +0,0 @@ -#include -#include -#include - -#include "trie_node.h" - -/** - * Allocate and initialize a new TrieInnerNode representing a given - * character. - * - * @param c character to represent - * @return pointer to newly allocated struct - */ -TrieInnerNode *tinode_init(char c) { - TrieInnerNode *node = calloc(1, sizeof(TrieInnerNode)); - node->key = c; - - return node; -} - -/** - * Allocate and initialize a new TrieNode. - * - * @return pointer to newly allocated struct - */ -TrieNode *tnode_init() { - TrieNode *node = malloc(sizeof(TrieNode)); - - node->tree_size = 0; - node->string_len = 0; - node->represents = false; - - return node; -} - -/** - * Free a TrieInnerNode and its underlying tree structure. This should - * usually only be called on the root of a binary tree to free the entire - * structure. - * - * @param node node whose tree to free - */ -void tinode_free_cascade(TrieInnerNode *node) { - if (node->left != NULL) { - tinode_free_cascade(node->left); - } - - if (node->right != NULL) { - tinode_free_cascade(node->right); - } - - if (node->next != NULL) { - tnode_free(node->next); - } - - free(node); -} - -/** - * Free a TrieNode and its underlying tree structure. - * - * @param node node to free - */ -void tnode_free(TrieNode *node) { - if (node->tree_size > 0) { - tinode_free_cascade(node->tree); - } - - // TODO properly free entry - /* if (node->payload != NULL) { */ - /* free(node->payload); */ - /* } */ - - free(node); -} - -/** - * This function performs a lookup in the underlying binary tree of the given - * TrieNode. If found, the return value is a pointer to the memory - * location where the TrieInnerNode representing the given character - * stores its `next` field. If not found, the return value is NULL, unless - * `create` is true. - * - * NOTE: a non-NULL return value does not mean that the dereferenced value is - * also not NULL. In particular, if `create` is set to true and the function had - * to create the new node, the dereferenced value will always be NULL. - * - * @param node node to perform lookup in. If node is a full leaf, the return - * value will always be NULL, regardless of the value of create. - * @param create whether to create the TrieInnerNode if it isn't present - * yet. If this is set to true, the function will never return NULL unless the - * node represents a leaf with a string, because the struct and therefore the - * address is created if it doesn't exist yet. - */ -TrieNode **tnode_search(TrieNode *node, const char c, bool create) { - // It can happen that the node has no initialized root yet - if (node->tree_size == 0) { - if (create) { - node->tree_size++; - node->tree = tinode_init(c); - - return &node->tree->next; - } - - return NULL; - } - - TrieInnerNode *parent = node->tree; - TrieInnerNode *child; - - // Iterate through the tree until we either find the character or realize it's - // not present in the tree - // FIXME don't use while (1) - while (1) { - if (parent->key == c) { - return &parent->next; - } else if (c < parent->key) { - child = parent->left; - } else { - child = parent->right; - } - - if (child == NULL) { - break; - } - - parent = child; - }; - - // child is NULL, meaning the character isn't in the binary tree yet. - - // If create is true, we create the new node so that we can still return a - // non-NULL pointer. - if (create) { - TrieInnerNode *new_node = tinode_init(c); - - if (c < parent->key) { - parent->left = new_node; - } else { - parent->right = new_node; - } - - node->tree_size++; - - return &new_node->next; - } - - return NULL; -} - -/** - * Split a remaining string leaf node in two. This function assumes it receives - * a full leaf as its input. - * - * @param node node to split - */ -/* void tnode_split(TrieNode *node) { */ -/* TrieNode *new_node = tnode_init(); */ -/* char key = node->ptr.string[0]; */ - -/* // There's a chance the remaining string was only 1 character, meaning the - * new */ -/* // node doesn't have to store a string */ -/* if (node->ptr.string[1] != DELIMITER) { */ -/* tnode_set_string(new_node, node->ptr.string + 1); */ -/* } else { */ -/* new_node->type = 1; */ -/* } */ - -/* new_node->entry = node->entry; */ - -/* node->type = 0; */ -/* node->size = 0; */ -/* node->entry = NULL; */ - -/* free(node->ptr.string); */ -/* node->ptr.string = NULL; */ - -/* // Initialize node's binary tree with the correct character */ -/* TrieNode **node_ptr = tnode_search(node, key, true); */ -/* *node_ptr = new_node; */ -/* } */ - -/* - * Remove the given character from a TrieInnerNode's subtree. The - * function assumes the character is indeed in the subtree. - */ -void tinode_remove(TrieInnerNode *node, const char c) { - TrieInnerNode **to_remove_ptr = &node; - - // We use pointers to pointers here so we can later free the removed node - // without having to know what its parent is - while ((*to_remove_ptr)->key != c) { - to_remove_ptr = (c < (*to_remove_ptr)->key) ? &(*to_remove_ptr)->left - : &(*to_remove_ptr)->right; - }; - - // If the node isn't a leaf, we have to replace it with another - if ((*to_remove_ptr)->left != NULL || (*to_remove_ptr)->right != NULL) { - TrieInnerNode *to_replace = *to_remove_ptr; - - // Replace with its only right child - if (to_replace->left == NULL) { - TrieInnerNode *to_remove = to_replace->right; - - to_replace->key = to_remove->key; - to_replace->next = to_remove->next; - to_replace->left = to_remove->left; - to_replace->right = to_remove->right; - - free(to_remove); - } - // Replace with its only left child - else if (to_replace->right == NULL) { - TrieInnerNode *to_remove = to_replace->left; - - to_replace->key = to_remove->key; - to_replace->next = to_remove->next; - to_replace->left = to_remove->left; - to_replace->right = to_remove->right; - - free(to_remove); - } - // Node has two children, so replace with successor - else { - TrieInnerNode *to_remove_parent = to_replace; - TrieInnerNode *to_remove = to_replace->right; - - while (to_remove->left != NULL) { - to_remove_parent = to_remove; - to_remove = to_remove->left; - } - - to_replace->key = to_remove->key; - to_replace->next = to_remove->next; - - if (to_remove_parent != to_replace) { - to_remove_parent->left = to_remove->right; - } else { - to_remove_parent->right = to_remove->right; - } - - free(to_remove); - } - } - // We're the leaf, so we free ourselves - else { - free(*to_remove_ptr); - *to_remove_ptr = NULL; - } -} - -/** - * Remove the given character from a TrieNode, respecting the rules - * of a binary search tree. This function assumes the character is in the search - * tree. - * - * @param node node to remove character from - * @param c character to remove - */ -void tnode_remove(TrieNode *node, const char c) { - tinode_remove(node->tree, c); - - node->tree_size--; -} diff --git a/src/trie/trie_node.h b/src/trie/trie_node.h deleted file mode 100644 index 229f5a6..0000000 --- a/src/trie/trie_node.h +++ /dev/null @@ -1,53 +0,0 @@ -#include - -#include "trie.h" - -/** - * Represents a node of the binary tree contained within each non-leaf - * TrieNode. - */ -typedef struct tinode { - struct tinode *left; - struct tinode *right; - struct tnode *next; - char key; -} TrieInnerNode; - -/** - * Represents a node inside a Trie. A node can be in one of three states: - * - Internal node: a node that's part of a path to a leaf node. This node will - * always have a size greater than one, and an initialized root. - * - Leaf: a node solely used to represent a string ending there. Its size is 0, - * its ptr is unitialized and represents is true. - * - Full leaf: a leaf node that contains a string. This occurs when a string is - * added whose path is not fully in the tree yet, causing its remaining suffix - * to be stored as a single node. Its size will be zero, represents its true, - * and its string pointer is initialized. - */ -typedef struct tnode { - Entry *entry; - - TrieInnerNode *tree; - uint8_t tree_size; - - // Skips are at most TRIE_MAX_SKIP_SIZE characters, and are stored in the - // nodes - char string[TRIE_MAX_SKIP_SIZE]; - uint8_t string_len; - - bool represents; -} TrieNode; - -TrieInnerNode *tinode_init(char c); - -TrieNode *tnode_init(); - -void tinode_free_cascade(TrieInnerNode *node); - -void tnode_free(TrieNode *node); - -TrieNode **tnode_search(TrieNode *node, const char c, bool create); - -void tinode_remove(TrieInnerNode *node, const char c); - -void tnode_remove(TrieNode *node, const char c); diff --git a/test/trie/test_trie.c b/test/trie/test_trie.c deleted file mode 100644 index 7a7e8d3..0000000 --- a/test/trie/test_trie.c +++ /dev/null @@ -1,189 +0,0 @@ -#include "test.h" -#include "trie.h" -#include "fuzzy.h" - -#define TEST_SIZE(ct, size) \ - TEST_CHECK(trie_size(ct) == size); \ - TEST_MSG("Size: %zu", trie_size(ct)) - -# define TRIE_INIT() \ - Trie *ct; \ - trie_init(&ct, NULL); \ - TEST_CHECK(ct != NULL) - -void test_init() { - TRIE_INIT(); - TEST_SIZE(ct, 0); - trie_free(ct); -} - -void test_add_one() { - TRIE_INIT(); - - Entry *entry = entry_new(Redirect, ""); - const char* string = "this is a test"; - - TEST_CHECK(trie_add(ct, string, entry) == Ok); - Entry *entry2; - TEST_CHECK(trie_search(ct, &entry2, string) == Ok); - TEST_CHECK(entry == entry2); - TEST_SIZE(ct, 1); - trie_free(ct); -} - -void test_add_prefix() { - TRIE_INIT(); - - const char *s1 = "halloween-2022"; - const char *s2 = "halloween-202"; - - Entry *entry1 = entry_new(Redirect, ""); - Entry *entry2 = entry_new(Redirect, ""); - - TEST_CHECK(trie_add(ct, s1, entry1) == Ok); - TEST_CHECK(trie_add(ct, s2, entry2) == Ok); - - Entry *entry3; - - TEST_CHECK(trie_search(ct, &entry3, s1) == Ok); - TEST_CHECK(entry3 == entry1); - entry2 = NULL; - - TEST_CHECK(trie_search(ct, &entry3, s2) == Ok); - TEST_CHECK(entry3 == entry2); - - trie_free(ct); -} - -void test_search_not_present() { - TRIE_INIT(); - - TEST_CHECK(trie_add(ct, "this string exists", NULL) == Ok); - Entry *entry; - TEST_CHECK(trie_search(ct, &entry, "this string does not exist") == NotFound); - - trie_free(ct); -} - -void test_add_more() { - TRIE_INIT(); - - const char* one = "one"; - const char* two = "two"; - const char* twenty = "twenty"; - const char* twentytwo = "twentytwo"; - - Entry *entry = entry_new(Redirect, ""); - - TEST_CHECK(trie_add(ct, one, entry) == Ok); - TEST_CHECK(trie_add(ct, two, entry) == Ok); - TEST_CHECK(trie_add(ct, twenty, entry) == Ok); - TEST_CHECK(trie_add(ct, twentytwo, entry) == Ok); - - TEST_SIZE(ct, 4); - - Entry *entry2; - TEST_CHECK(trie_search(ct, &entry2, one) == Ok); - TEST_CHECK(entry2 == entry); - entry2 = NULL; - - TEST_CHECK(trie_search(ct, &entry2, two) == Ok); - TEST_CHECK(entry2 == entry); - entry2 = NULL; - - TEST_CHECK(trie_search(ct, &entry2, twenty) == Ok); - TEST_CHECK(entry2 == entry); - entry2 = NULL; - - TEST_CHECK(trie_search(ct, &entry2, twentytwo) == Ok); - TEST_CHECK(entry2 == entry); - entry2 = NULL; - - TEST_CHECK(trie_add(ct, one, NULL) == AlreadyPresent); - TEST_CHECK(trie_add(ct, two, NULL) == AlreadyPresent); - TEST_CHECK(trie_add(ct, twenty, NULL) == AlreadyPresent); - TEST_CHECK(trie_add(ct, twentytwo, NULL) == AlreadyPresent); - - trie_free(ct); -} - -/* void test_remove_one() { */ -/* Trie* ct = trie_init(); */ -/* TEST_CHECK(ct != NULL); */ - -/* const char* string = "this is a test"; */ -/* TEST_CHECK(trie_add(ct, string, NULL)); */ -/* TEST_SIZE(ct, 1); */ - -/* TEST_CHECK(trie_remove(ct, string)); */ -/* TEST_SIZE(ct, 0); */ - -/* trie_free(ct); */ -/* } */ - -/* void test_remove_more() { */ -/* Trie* ct = trie_init(); */ -/* TEST_CHECK(ct != NULL); */ - -/* const char* one = "one"; */ -/* const char* two = "two"; */ -/* const char* twenty = "twenty"; */ -/* const char* twentytwo = "twentytwo"; */ -/* TEST_CHECK(trie_add(ct, one, NULL)); */ -/* TEST_CHECK(trie_add(ct, two, NULL)); */ -/* TEST_CHECK(trie_add(ct, twenty, NULL)); */ -/* TEST_CHECK(trie_add(ct, twentytwo, NULL)); */ - -/* TEST_SIZE(ct, 4); */ - -/* TEST_CHECK(trie_remove(ct, one)); */ -/* TEST_CHECK(trie_remove(ct, two)); */ -/* TEST_CHECK(trie_remove(ct, twenty)); */ -/* TEST_CHECK(trie_remove(ct, twentytwo)); */ - -/* TEST_SIZE(ct, 0); */ - -/* trie_free(ct); */ -/* } */ - -/* void test_remove_not_present() { */ -/* Trie* ct = trie_init(); */ -/* TEST_CHECK(ct != NULL); */ - -/* TEST_CHECK(trie_add(ct, "this string exists", NULL)); */ -/* TEST_CHECK(!trie_remove(ct, "this string does not exist")); */ - -/* trie_free(ct); */ -/* } */ - -// Test seeds that are known to fail so we don't get regressions -void test_fuzzy_set() { - FuzzyConfig configs[] = { - { 403318210, 5, 500}, - { 588218406, 16, 460}, - { 297512224, 21, 500}, - { 403318210, 5, 500} - }; - - int count = sizeof(configs) / sizeof(FuzzyConfig); - int res; - - for (int i = 0; i < count; i++) { -res = fuzzy_test_trie_seed(configs[i]); - TEST_CHECK_(res == 0, - "Failed config, seed = %i, len = %i, count = %i, code=%i", configs[i].seed, configs[i].word_length, configs[i].word_count, res); - } -} - -TEST_LIST = { - {"trie init",test_init }, - { "trie add one",test_add_one }, - { "trie add more",test_add_more }, - { "trie search not present",test_search_not_present}, - - /* { "trie remove one",test_remove_one }, */ - /* { "trie remove more",test_remove_more }, */ - /* { "trie remove not present",test_remove_not_present}, */ - { "trie fuzzy set", test_fuzzy_set }, - { NULL, NULL} -}; diff --git a/test/trie/test_trie_fuzzy.c b/test/trie/test_trie_fuzzy.c deleted file mode 100644 index 33a2e24..0000000 --- a/test/trie/test_trie_fuzzy.c +++ /dev/null @@ -1,34 +0,0 @@ -#include "test.h" -#include "trie.h" -#include "fuzzy.h" - -void test_fuzzy() { - // Randomize seed - srand(time(NULL)); - - FuzzyConfig config; - int counter = 0; - int res; - - for (int len = 1; len < 25; len += 5) { - for (int count = 10; count <= 500; count += 10) { - for (int i = 0; i < 50; i++) { - counter++; - - config.seed = rand(); - config.word_length = len; - config.word_count = count; - -res = fuzzy_test_trie_seed(config); - TEST_CHECK_(res == 0, - "Failed config, seed = %i, len = %i, count = %i, code = %i", config.seed, config.word_length, config.word_count, res); - } - } - } - TEST_MSG("fuzzy tests done = %i", counter); -} - -TEST_LIST = { - { "customtrie fuzzy", test_fuzzy }, - { NULL, NULL} -};