17 changed files with 3338 additions and 22 deletions
--- a/.woodpecker/build.yml
+++ b/.woodpecker/build.yml
@ -38,13 +38,9 @@ pipeline:
    secrets:
      - minio_access_key
      - minio_secret_key
    when:
      branch:
        exclude: [ release/* ]
      event: push
  publish-rel:
-    image: 'curlimages/curl'
+    image: 'alpine:3.18.0'
    group: publish
    commands:
      - >
@ -57,7 +53,5 @@ pipeline:
        --user "Chewing_Bever:$GITEA_PASSWORD"
        --upload-file landerctl/build/landerctl
        https://git.rustybever.be/api/packages/Chewing_Bever/generic/lander/"${CI_COMMIT_TAG}"/landerctl-"$(echo '${PLATFORM}' | sed 's:/:-:g')"
    secrets:
      - gitea_password
    when:
      event: tag
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -7,8 +7,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased](https://git.rustybever.be/Chewing_Bever/lander/src/branch/dev)
 ## [0.2.0](https://git.rustybever.be/Chewing_Bever/lander/src/tag/0.2.0)
 ### Added
 * HTTP Loop
--- a/README.md
+++ b/README.md
@ -1,16 +1,5 @@
 # Lander
 Lander is an HTTP/1.1 server that acts as a URL shortener, pastebin and
 file-sharing service. It's written from the ground up in C, complete with an
 HTTP framework built on top of an event loop implementation based on [Build
 Your Own Redis with C/C++](https://build-your-own.org/redis/). Lookup of
 entries is done using an in-memory trie data structure, and on-disk storage
 uses a custom binary database format.
 The codebase uses one thirdparty library, namely
 [picohttpparser](https://github.com/h2o/picohttpparser) for parsing HTTP
 requests.
 ## The idea
 A URL shortener has always been on my list of things I'd like to write myself.
@ -21,6 +10,12 @@ different tries (Patricia trie, ternary trie, and a custom one). Considering
 these are efficient string-based search trees, this gave me the idea to use it
 as the backend for a URL shortener!
 This implementation currently uses a ternary trie as its search tree. The
 persistence model is very simple; I simply append a line to a text file every
 time a URL is added, and add the lines of this file to the trie on startup. The
 trie is stored completely im memory, and no I/O operations are required when
 requesting a redirect. This makes the server very fast.
 ## The name
 I gave up giving my projects original names a long time ago, so now I just use
--- a/config.mk
+++ b/config.mk
@ -1,4 +1,4 @@
-VERSION := 0.2.0
+VERSION := 0.1.0
 BIN_FILENAME = lander
@ -7,7 +7,7 @@ SRC_DIR        = src
 TEST_DIR       = test
 THIRDPARTY_DIR = thirdparty
-INC_DIRS  = include $(THIRDPARTY_DIR)/include lsm/include
+INC_DIRS  = include $(THIRDPARTY_DIR)/include trie/include lsm/include
 LIBS      = m lsm
 LIB_DIRS  = ./lsm/build
--- a/trie/Makefile
+++ b/trie/Makefile
@ -0,0 +1,95 @@
 # https://spin.atomicobject.com/2016/08/26/makefile-c-projects/ was a great
 # base for this Makefile
 -include config.mk
 LIB := $(BUILD_DIR)/$(LIB_FILENAME)
 SRCS != find '$(SRC_DIR)' -iname '*.c'
 SRCS_H != find $(INC_DIRS) -iname '*.h'
 SRCS_H_INTERNAL != find $(SRC_DIR) -iname '*.h'
 SRCS_TEST != find '$(TEST_DIR)' -iname '*.c'
 OBJS := $(SRCS:%=$(BUILD_DIR)/%.o)
 OBJS_TEST := $(SRCS_TEST:%=$(BUILD_DIR)/%.o)
 DEPS := $(SRCS:%=$(BUILD_DIR)/%.d) $(SRCS_TEST:%=$(BUILD_DIR)/%.d)
 BINS_TEST := $(OBJS_TEST:%.c.o=%)
 TARGETS_TEST := $(BINS_TEST:%=test-%)
 TARGETS_MEM_TEST := $(BINS_TEST:%=test-mem-%)
 _CFLAGS := $(addprefix -I,$(INC_DIRS)) $(CFLAGS) -Wall -Wextra
 .PHONY: all
 all: lib
 # =====COMPILATION=====
 # Utility used by the CI to lint
 .PHONY: objs
 objs: $(OBJS)
 .PHONY: lib
 lib: $(LIB)
 $(LIB): $(OBJS)
 	ar -rcs $@ $(OBJS)
 $(BUILD_DIR)/$(SRC_DIR)/%.c.o: $(SRC_DIR)/%.c
 	mkdir -p $(dir $@)
 	$(CC) -c $(_CFLAGS) $< -o $@
 # =====TESTING=====
 .PHONY: test
 test: $(TARGETS_TEST)
 .PHONY: test-mem
 test-mem: $(TARGETS_MEM_TEST)
 .PHONY: $(TARGETS_TEST)
 $(TARGETS_TEST): test-%: %
 	./$^
 .PHONY: $(TARGETS_MEM_TEST)
 $(TARGETS_MEM_TEST): test-mem-%: %
 	valgrind --tool=memcheck --error-exitcode=1 --track-origins=yes --leak-check=full ./$^
 .PHONY: build-test
 build-test: $(BINS_TEST)
 $(BINS_TEST): %: %.c.o $(LIB)
 	$(CC) \
 		$^ -o $@
 # Along with the include directory, each test includes $(TEST_DIR) (which
 # contains the acutest.h header file), and the src directory of the module it's
 # testing. This allows tests to access internal methods, which aren't publicly
 # exposed.
 $(BUILD_DIR)/$(TEST_DIR)/%.c.o: $(TEST_DIR)/%.c
 	mkdir -p $(dir $@)
 	$(CC) $(_CFLAGS) -I$(TEST_DIR) \
 		-I$(dir $(@:$(BUILD_DIR)/$(TEST_DIR)/%=$(SRC_DIR)/%)) \
 		-c $< -o $@
 # =====MAINTENANCE=====
 .PHONY: lint
 lint:
 	clang-format -n --Werror $(SRCS) $(SRCS_H) $(SRCS_H_INTERNAL)
 .PHONY: fmt
 fmt:
 	clang-format -i $(SRCS) $(SRCS_H) $(SRCS_H_INTERNAL)
 .PHONY: clean
 clean:
 	rm -rf $(BUILD_DIR)
 .PHONY: bear
 bear: clean
 	bear -- make
 	bear --append -- make build-test
 # Make make aware of the .d files
 -include $(DEPS)
--- a/trie/README.md
+++ b/trie/README.md
@ -0,0 +1,16 @@
 # Trie design
 The underlying data structure is based on a combination of a ternary and a
 Patricia trie.
 * Nodes are classic ternary trie nodes, meaning each node contains a binary
  search tree
 * Each node can define a skip, like a Patricia trie, of at most 8 characters.
  These skipped characters are stored directly in the structs defining the
  nodes.
 * While the add function relies on the fact that the input is a NULL-terminated
  C string, the trie itself does not store any NULL bytes.
 The goal of this datastructure is to be as optimized as possible for search
 operations with short (usually < 8 characters) keys, as this is by far the most
 common operation for a URL shortener/pastebin.
--- a/trie/config.mk
+++ b/trie/config.mk
@ -0,0 +1,13 @@
 LIB_FILENAME = libtrie.a
 BUILD_DIR = build
 SRC_DIR = src
 TEST_DIR = test
 INC_DIRS = include
 # -MMD: generate a .d file for every source file. This file can be imported by
 #  make and makes make aware that a header file has been changed, ensuring an
 #  object file is also recompiled if only a header is changed.
 # -MP: generate a dummy target for every header file (according to the  docs it
 #  prevents some errors when removing header files)
 CFLAGS = -MMD -MP -g
--- a/trie/include/trie.h
+++ b/trie/include/trie.h
@ -0,0 +1,148 @@
 #ifndef AD3_TERNARYTRIE
 #define AD3_TERNARYTRIE
 #define ALPHABET_SIZE 256
 #define DELIMITER '\0'
 #define MAX(x, y) (((x) > (y)) ? (x) : (y))
 // Should not be higher than 254 or stuff will break
 #define TRIE_MAX_SKIP_SIZE 8
 /**
 * The implementation of a Ternary Trie.
 *
 * Each node should be represented by a binary tree in order to reduce the
 * memory usage.
 */
 #include <stdbool.h>
 #include <stddef.h>
 #include <string.h>
 static const char charset[] =
    "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
 static const size_t charset_len = sizeof(charset) - 1;
 // Length of randomly generated keys
 #define RANDOM_KEY_LENGTH_SHORT 4
 #define RANDOM_KEY_LENGTH_LONG 16
 /**
 * Type definition for the struct representing the current Trie.
 *
 * You can (and should) redefine this in your c-file with the concrete fields.
 */
 typedef struct ttrie Trie;
 typedef enum entry_type { Redirect, Paste, Unknown } EntryType;
 typedef struct entry {
  EntryType type;
  char *string;
 } Entry;
 typedef enum trie_exit_code {
  Ok = 0,
  NotFound,
  AlreadyPresent,
  FileError
 } TrieExitCode;
 Entry *entry_new(EntryType type, const char *string);
 /**
 * Allocate & initialize a new trie, and populate it with the data from the
 * given data file.
 *
 * @return 0 if everything was successful, non-zero otherwise
 */
 TrieExitCode trie_init(Trie **trie_ptr, const char *file_path);
 /**
 * De-allocate a trie by freeing the memory occupied by this trie.
 *
 * @param trie which should be freed
 */
 void trie_free(Trie *trie);
 /**
 * Search for an entry in the trie.
 *
 * @param trie
 * @param entry_ptr pointer to Entry will be stored here, if found
 * @param key key representing the entry
 * @return 0 if the search was successful, 1 if not found
 */
 TrieExitCode trie_search(Trie *trie, Entry **entry_ptr, const char *key);
 TrieExitCode trie_search_len(Trie *trie, Entry **entry_ptr, const char *key,
                             size_t key_len);
 /**
 * Add a string to this trie.
 *
 * @param trie
 * @param key key to represent entry with
 * @param entry entry to add
 * @return 0 if added, 1 if already in trie, something else if other errors
 */
 TrieExitCode trie_add(Trie *trie, const char *key, Entry *entry);
 TrieExitCode trie_add_len(Trie *trie, const char *key, size_t key_len,
                          Entry *entry);
 /**
 * Add an entry by generating a random string as the key.
 *
 * @param trie
 * @param entry entry to add
 * @param secure whether to generate a longer, more secure random key
 * @return pointer to the generated key. This pointer is safe to use after
 * unlocking the trie, and should be freed manually.
 */
 TrieExitCode trie_add_random(Trie *trie, char **key_ptr, Entry *entry,
                             bool secure);
 /**
 * Remove an entry from this trie given its key.
 *
 * @param trie
 * @param key key representing entry
 * @return true if the entry was present and has been removed, false if it was
 * not present
 */
 bool trie_remove(Trie *trie, const char *key);
 /**
 * Returns the number of entries in this trie.
 *
 * @param trie
 * @return the number of entries in this trie
 */
 size_t trie_size(Trie *trie);
 /*
 * Acquire a read lock on the trie.
 *
 * @return 0 if successful, non-zero otherwise (return value of
 * pthread_rwlock_rdlock)
 */
 int trie_rlock(Trie *trie);
 /*
 * Acquire a write lock on the trie.
 *
 * @return 0 if successful, non-zero otherwise (return value of
 * pthread_rwlock_wrlock)
 */
 int trie_wlock(Trie *trie);
 /*
 * Release the lock on a trie after having acquired it beforehand.
 *
 * @return 0 if successful, non-zero otherwise (return value of
 * pthread_rwlock_unlock)
 */
 int trie_unlock(Trie *trie);
 #endif // AD3_TERNARYTRIE
--- a/trie/src/trie/trie.c
+++ b/trie/src/trie/trie.c
@ -0,0 +1,428 @@
 #include <pthread.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include "trie.h"
 #include "trie_entry.h"
 #include "trie_node.h"
 typedef struct ttrie {
  TrieNode *root;
  size_t size;
  char *file_path;
  pthread_rwlock_t lock;
 } Trie;
 TrieExitCode trie_add_no_lock(Trie *trie, const char *key, Entry *entry);
 /**
 * Allocate and initialize an empty Trie
 *
 * @return pointer to the empty Trie
 */
 TrieExitCode trie_init(Trie **trie_ptr, const char *file_path) {
  // Allocate & initialize trie
  Trie *trie = calloc(1, sizeof(Trie));
  trie->root = tnode_init();
  pthread_rwlock_init(&trie->lock, NULL);
  if (file_path == NULL) {
    trie->file_path = NULL;
    *trie_ptr = trie;
    return Ok;
  }
  trie->file_path = strdup(file_path);
  // Populate trie with data from file
  FILE *fp = fopen(file_path, "r");
  if (fp == NULL) {
    return FileError;
  }
  // We read in lines of at most 8192 characters (sounds like enough)
  char buffer[8192];
  EntryType type;
  Entry *entry;
  int i, j;
  TrieExitCode status;
  while (fgets(buffer, 8192, fp)) {
    i = 0;
    // Move index in buffer until we encounter first space character
    while (buffer[i] != ' ') {
      i++;
    }
    // Split the buffer into two strings, the key and the payload
    buffer[i] = '\0';
    type = entry_type_from_char(buffer[i + 1]);
    // Skip type character & its surrounding spaces
    j = i + 3;
    // Now remove the newline character
    while (buffer[j] != '\n') {
      j++;
    }
    buffer[j] = '\0';
    entry = entry_new(type, buffer + i + 3);
    status = trie_add_no_lock(trie, buffer, entry);
    if (status != Ok) {
      trie_free(trie);
      return status;
    }
  }
  fclose(fp);
  *trie_ptr = trie;
  return Ok;
 }
 /**
 * De-allocate a TernaryTree by freeing its entire underlying structure.
 *
 * @param trie trie to free
 */
 void trie_free(Trie *trie) {
  tnode_free(trie->root);
  free(trie);
 }
 typedef struct searchresult {
  TrieNode *parent;
  TrieNode *child;
 } SearchResult;
 SearchResult trie_search_node_len(Trie *trie, const char *key, size_t key_len) {
  SearchResult out = {NULL, NULL};
  size_t i = 0;
  TrieNode **node_ptr = &(trie->root);
  TrieNode **child_ptr;
  do {
    child_ptr = tnode_search(*node_ptr, key[i], false);
    // We don't have to check whether *node_ptr is NULL, because if it was
    // NULL, it wouldn't be in the binary tree.
    if (child_ptr == NULL) {
      return out;
    }
    i++;
    if (memcmp((*child_ptr)->string, key + i, (*child_ptr)->string_len) != 0) {
      return out;
    }
    i += (*child_ptr)->string_len;
    if (i < key_len) {
      node_ptr = child_ptr;
    }
  } while (i < key_len);
  // At this point, we've either arrived at an empty child, or traversed through
  // the entire string. Therefore, all we have to do is check whether we're at
  // the end of the string and if node represents a string.
  if (i == key_len && (*child_ptr)->represents) {
    out.parent = *node_ptr;
    out.child = *child_ptr;
  }
  return out;
 }
 SearchResult trie_search_node(Trie *trie, const char *key) {
  return trie_search_node_len(trie, key, strlen(key));
 }
 /**
 * Returns whether the given string is present in the trie.
 *
 * @param trie trie to look in
 * @param string string to look up
 * @return true if the string is present in the trie, false otherwise
 */
 TrieExitCode trie_search_len(Trie *trie, Entry **entry_ptr, const char *key,
                             size_t key_len) {
  SearchResult res = trie_search_node_len(trie, key, key_len);
  if (res.child == NULL) {
    return NotFound;
  }
  *entry_ptr = res.child->entry;
  return Ok;
 }
 TrieExitCode trie_search(Trie *trie, Entry **entry_ptr, const char *key) {
  return trie_search_len(trie, entry_ptr, key, strlen(key));
 }
 /**
 * Add the given string to the Trie.
 *
 * @param trie trie to add string to
 * @param string string to add
 * @return true if the string wasn't present in the trie and thus added, false
 * otherwise
 */
 TrieExitCode trie_add_len_no_lock(Trie *trie, const char *key, size_t key_len,
                                  Entry *entry) {
  size_t i = 0;
  uint8_t offset;
  TrieNode **node_ptr = &(trie->root);
  TrieNode **child_node_ptr;
  TrieNode *child_node;
  do {
    offset = 0;
    child_node_ptr = tnode_search(*node_ptr, key[i], true);
    i++;
    // We've reached a NULL child, so we add the remaining part of the string
    // here
    if (*child_node_ptr == NULL) {
      child_node = tnode_init();
      while (offset < TRIE_MAX_SKIP_SIZE && i + offset < key_len) {
        offset++;
      }
      memcpy(child_node->string, key + i, offset);
      child_node->string_len = offset;
      *child_node_ptr = child_node;
      // If the remaining part of the string is still longer than the maximum
      // allowed skip length, we continue through the loop. The next iteration
      // will enter this if statement again, and perform the same loop, until
      // the string is fully added to the trie.
      if (i + offset < key_len) {
        node_ptr = child_node_ptr;
        i += offset;
        continue;
      }
      child_node->represents = true;
      child_node->entry = entry;
      trie->size++;
      return Ok;
    }
    while (offset < (*child_node_ptr)->string_len) {
      // String no longer aligns with edge, so we have to split
      if (key[i + offset] != (*child_node_ptr)->string[offset]) {
        TrieNode *split_node = tnode_init();
        child_node = *child_node_ptr;
        // New string of the split node is the prefix that we were able
        // to skip
        if (offset > 0) {
          memcpy(split_node->string, child_node->string, offset);
          split_node->string_len = offset;
        }
        // split_node replaces child_node as the child of node
        *child_node_ptr = split_node;
        TrieNode **new_node_ptr =
            tnode_search(split_node, child_node->string[offset], true);
        *new_node_ptr = child_node;
        // child_node has now become a child of split_node, so we update its
        // string accordingely by removing the skipped prefix + the one
        // character that's already stored by being a child of split_node
        /* char *old_string = child_node->string.ptr; */
        uint8_t new_skip_len = child_node->string_len - (offset + 1);
        if (new_skip_len > 0) {
          char old_string[TRIE_MAX_SKIP_SIZE];
          memcpy(old_string, child_node->string + offset + 1, new_skip_len);
          memcpy(child_node->string, old_string, new_skip_len);
        }
        child_node->string_len = new_skip_len;
        // The while loop will exit either way after this has happened, as
        // child_node is now split_node and split_node's len is already set to
        // offset.
        break;
      }
      offset++;
    }
    node_ptr = child_node_ptr;
    i += offset;
  } while (i < key_len);
  if ((*child_node_ptr)->represents) {
    return AlreadyPresent;
  }
  (*child_node_ptr)->represents = true;
  (*child_node_ptr)->entry = entry;
  trie->size++;
  return Ok;
 }
 TrieExitCode trie_add_no_lock(Trie *trie, const char *key, Entry *entry) {
  return trie_add_len_no_lock(trie, key, strlen(key), entry);
 }
 TrieExitCode trie_add_len(Trie *trie, const char *key, size_t key_len,
                          Entry *entry) {
  if (trie->file_path != NULL) {
    // Easiest way to make sure we don't add duplicate entries
    // We use an internal function that doesn't require a read lock, as we're
    // already inside a write lock
    if (trie_search_node_len(trie, key, key_len).child != NULL) {
      return AlreadyPresent;
    }
    FILE *fp = fopen(trie->file_path, "a");
    if (fp == NULL) {
      return FileError;
    }
    fputs(key, fp);
    fputs(" ", fp);
    fputc(entry_type_to_char(entry->type), fp);
    fputs(" ", fp);
    fputs(entry->string, fp);
    fputs("\n", fp);
    fclose(fp);
  }
  // This function *should* always return Ok. Otherwise, the function would've
  // exited because the string was found in the trie.
  return trie_add_len_no_lock(trie, key, key_len, entry);
 }
 TrieExitCode trie_add(Trie *trie, const char *key, Entry *entry) {
  return trie_add_len(trie, key, strlen(key), entry);
 }
 TrieExitCode trie_add_random(Trie *trie, char **key_ptr, Entry *entry,
                             bool secure) {
  // Generate random key
  bool ok = false;
  int key_length = secure ? RANDOM_KEY_LENGTH_LONG : RANDOM_KEY_LENGTH_SHORT;
  char *key = malloc(key_length + 1);
  key[key_length] = '\0';
  // We naively generate new keys until we find a key that isn't in the trie
  // yet. With charset_len ** RANDOM_KEY_LENGTH sufficiently large, this isn't a
  // problem, because the chances of collisions are extremely small.
  while (!ok) {
    for (int i = 0; i < key_length; i++) {
      key[i] = charset[rand() % charset_len];
    }
    ok = trie_search_node(trie, key).child == NULL;
  }
  TrieExitCode return_value = trie_add(trie, key, entry);
  if (return_value == Ok) {
    *key_ptr = key;
  } else {
    free(key);
  }
  return return_value;
 }
 /**
 * Remove the given string from a Trie.
 *
 * @param trie trie to remove string from
 * @param string string to remove
 * @return true if the string was in the trie and thus removed, false otherwise
 */
 /* bool trie_remove(Trie *trie, const char *string) { */
 /*   pthread_rwlock_wrlock(&trie->lock); */
 /*   bool return_value = false; */
 /*   SearchResult res = trie_search_node(trie, string); */
 /*   if (res.child == NULL) { */
 /*     goto end; */
 /*   } */
 /*   trie->size--; */
 /*   return_value = true; */
 /*   if (res.parent != NULL) { */
 /*     // We're removing a full leaf, so we calculate the offset of the
 * character */
 /*     // to remove from the parent */
 /*     if (res.child->type == 2) { */
 /*       size_t str_len = strlen(string); */
 /*       size_t suffix_len = strlen(res.child->ptr.string); */
 /*       tnode_remove(res.parent, string[str_len - suffix_len - 1]); */
 /*     } */
 /*     // In the other case, the character to remove from the parent is the last
 */
 /*     // character of the string */
 /*     else if (res.child->size == 0) { */
 /*       size_t i = 0; */
 /*       while (string[i + 1] != DELIMITER) { */
 /*         i++; */
 /*       } */
 /*       tnode_remove(res.parent, string[i]); */
 /*     } else { */
 /*       res.child->type = 0; */
 /*       goto end; */
 /*     } */
 /*     tnode_free(res.child); */
 /*   } */
 /*   // We're in the root here */
 /*   else { */
 /*     res.child->type = 0; */
 /*   } */
 /* end: */
 /*   pthread_rwlock_unlock(&trie->lock); */
 /*   return return_value; */
 /* } */
 /**
 * Return the current size of the given trie.
 *
 * @param trie trie to return size for
 * @return size of the trie
 */
 size_t trie_size(Trie *trie) { return trie->size; }
 int trie_rlock(Trie *trie) { return pthread_rwlock_rdlock(&trie->lock); }
 int trie_wlock(Trie *trie) { return pthread_rwlock_wrlock(&trie->lock); }
 int trie_unlock(Trie *trie) { return pthread_rwlock_unlock(&trie->lock); }
--- a/trie/src/trie/trie_entry.c
+++ b/trie/src/trie/trie_entry.c
@ -0,0 +1,37 @@
 #include "trie_entry.h"
 #include <stdlib.h>
 EntryType entry_type_from_char(char c) {
  switch (c) {
  case '0':
    return Redirect;
  case '1':
    return Paste;
  default:
    return Unknown;
  }
 }
 char entry_type_to_char(EntryType et) {
  switch (et) {
  case Redirect:
    return '0';
  case Paste:
    return '1';
  default:
    return '\0';
  }
 }
 Entry *entry_new(EntryType type, const char *string) {
  Entry *entry = malloc(sizeof(Entry));
  entry->type = type;
  if (string != NULL) {
    entry->string = strdup(string);
  } else {
    entry->string = NULL;
  }
  return entry;
 }
--- a/trie/src/trie/trie_entry.h
+++ b/trie/src/trie/trie_entry.h
@ -0,0 +1,7 @@
 #include "trie.h"
 EntryType entry_type_from_char(char c);
 char entry_type_to_char(EntryType et);
 Entry *entry_new(EntryType type, const char *string);
--- a/trie/src/trie/trie_node.c
+++ b/trie/src/trie/trie_node.c
@ -0,0 +1,265 @@
 #include <stdbool.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include "trie_node.h"
 /**
 * Allocate and initialize a new TrieInnerNode representing a given
 * character.
 *
 * @param c character to represent
 * @return pointer to newly allocated struct
 */
 TrieInnerNode *tinode_init(char c) {
  TrieInnerNode *node = calloc(1, sizeof(TrieInnerNode));
  node->key = c;
  return node;
 }
 /**
 * Allocate and initialize a new TrieNode.
 *
 * @return pointer to newly allocated struct
 */
 TrieNode *tnode_init() {
  TrieNode *node = malloc(sizeof(TrieNode));
  node->tree_size = 0;
  node->string_len = 0;
  node->represents = false;
  return node;
 }
 /**
 * Free a TrieInnerNode and its underlying tree structure. This should
 * usually only be called on the root of a binary tree to free the entire
 * structure.
 *
 * @param node node whose tree to free
 */
 void tinode_free_cascade(TrieInnerNode *node) {
  if (node->left != NULL) {
    tinode_free_cascade(node->left);
  }
  if (node->right != NULL) {
    tinode_free_cascade(node->right);
  }
  if (node->next != NULL) {
    tnode_free(node->next);
  }
  free(node);
 }
 /**
 * Free a TrieNode and its underlying tree structure.
 *
 * @param node node to free
 */
 void tnode_free(TrieNode *node) {
  if (node->tree_size > 0) {
    tinode_free_cascade(node->tree);
  }
  // TODO properly free entry
  /* if (node->payload != NULL) { */
  /*     free(node->payload); */
  /* } */
  free(node);
 }
 /**
 * This function performs a lookup in the underlying binary tree of the given
 * TrieNode. If found, the return value is a pointer to the memory
 * location where the TrieInnerNode representing the given character
 * stores its `next` field. If not found, the return value is NULL, unless
 * `create` is true.
 *
 * NOTE: a non-NULL return value does not mean that the dereferenced value is
 * also not NULL. In particular, if `create` is set to true and the function had
 * to create the new node, the dereferenced value will always be NULL.
 *
 * @param node node to perform lookup in. If node is a full leaf, the return
 * value will always be NULL, regardless of the value of create.
 * @param create whether to create the TrieInnerNode if it isn't present
 * yet. If this is set to true, the function will never return NULL unless the
 * node represents a leaf with a string, because the struct and therefore the
 * address is created if it doesn't exist yet.
 */
 TrieNode **tnode_search(TrieNode *node, const char c, bool create) {
  // It can happen that the node has no initialized root yet
  if (node->tree_size == 0) {
    if (create) {
      node->tree_size++;
      node->tree = tinode_init(c);
      return &node->tree->next;
    }
    return NULL;
  }
  TrieInnerNode *parent = node->tree;
  TrieInnerNode *child;
  // Iterate through the tree until we either find the character or realize it's
  // not present in the tree
  // FIXME don't use while (1)
  while (1) {
    if (parent->key == c) {
      return &parent->next;
    } else if (c < parent->key) {
      child = parent->left;
    } else {
      child = parent->right;
    }
    if (child == NULL) {
      break;
    }
    parent = child;
  };
  // child is NULL, meaning the character isn't in the binary tree yet.
  // If create is true, we create the new node so that we can still return a
  // non-NULL pointer.
  if (create) {
    TrieInnerNode *new_node = tinode_init(c);
    if (c < parent->key) {
      parent->left = new_node;
    } else {
      parent->right = new_node;
    }
    node->tree_size++;
    return &new_node->next;
  }
  return NULL;
 }
 /**
 * Split a remaining string leaf node in two. This function assumes it receives
 * a full leaf as its input.
 *
 * @param node node to split
 */
 /* void tnode_split(TrieNode *node) { */
 /*   TrieNode *new_node = tnode_init(); */
 /*   char key = node->ptr.string[0]; */
 /*   // There's a chance the remaining string was only 1 character, meaning the
 * new */
 /*   // node doesn't have to store a string */
 /*   if (node->ptr.string[1] != DELIMITER) { */
 /*     tnode_set_string(new_node, node->ptr.string + 1); */
 /*   } else { */
 /*     new_node->type = 1; */
 /*   } */
 /*   new_node->entry = node->entry; */
 /*   node->type = 0; */
 /*   node->size = 0; */
 /*   node->entry = NULL; */
 /*   free(node->ptr.string); */
 /*   node->ptr.string = NULL; */
 /*   // Initialize node's binary tree with the correct character */
 /*   TrieNode **node_ptr = tnode_search(node, key, true); */
 /*   *node_ptr = new_node; */
 /* } */
 /*
 * Remove the given character from a TrieInnerNode's subtree. The
 * function assumes the character is indeed in the subtree.
 */
 void tinode_remove(TrieInnerNode *node, const char c) {
  TrieInnerNode **to_remove_ptr = &node;
  // We use pointers to pointers here so we can later free the removed node
  // without having to know what its parent is
  while ((*to_remove_ptr)->key != c) {
    to_remove_ptr = (c < (*to_remove_ptr)->key) ? &(*to_remove_ptr)->left
                                                : &(*to_remove_ptr)->right;
  };
  // If the node isn't a leaf, we have to replace it with another
  if ((*to_remove_ptr)->left != NULL || (*to_remove_ptr)->right != NULL) {
    TrieInnerNode *to_replace = *to_remove_ptr;
    // Replace with its only right child
    if (to_replace->left == NULL) {
      TrieInnerNode *to_remove = to_replace->right;
      to_replace->key = to_remove->key;
      to_replace->next = to_remove->next;
      to_replace->left = to_remove->left;
      to_replace->right = to_remove->right;
      free(to_remove);
    }
    // Replace with its only left child
    else if (to_replace->right == NULL) {
      TrieInnerNode *to_remove = to_replace->left;
      to_replace->key = to_remove->key;
      to_replace->next = to_remove->next;
      to_replace->left = to_remove->left;
      to_replace->right = to_remove->right;
      free(to_remove);
    }
    // Node has two children, so replace with successor
    else {
      TrieInnerNode *to_remove_parent = to_replace;
      TrieInnerNode *to_remove = to_replace->right;
      while (to_remove->left != NULL) {
        to_remove_parent = to_remove;
        to_remove = to_remove->left;
      }
      to_replace->key = to_remove->key;
      to_replace->next = to_remove->next;
      if (to_remove_parent != to_replace) {
        to_remove_parent->left = to_remove->right;
      } else {
        to_remove_parent->right = to_remove->right;
      }
      free(to_remove);
    }
  }
  // We're the leaf, so we free ourselves
  else {
    free(*to_remove_ptr);
    *to_remove_ptr = NULL;
  }
 }
 /**
 * Remove the given character from a TrieNode, respecting the rules
 * of a binary search tree. This function assumes the character is in the search
 * tree.
 *
 * @param node node to remove character from
 * @param c character to remove
 */
 void tnode_remove(TrieNode *node, const char c) {
  tinode_remove(node->tree, c);
  node->tree_size--;
 }
--- a/trie/src/trie/trie_node.h
+++ b/trie/src/trie/trie_node.h
@ -0,0 +1,53 @@
 #include <stdint.h>
 #include "trie.h"
 /**
 * Represents a node of the binary tree contained within each non-leaf
 * TrieNode.
 */
 typedef struct tinode {
  struct tinode *left;
  struct tinode *right;
  struct tnode *next;
  char key;
 } TrieInnerNode;
 /**
 * Represents a node inside a Trie. A node can be in one of three states:
 * - Internal node: a node that's part of a path to a leaf node. This node will
 *   always have a size greater than one, and an initialized root.
 * - Leaf: a node solely used to represent a string ending there. Its size is 0,
 *   its ptr is unitialized and represents is true.
 * - Full leaf: a leaf node that contains a string. This occurs when a string is
 *   added whose path is not fully in the tree yet, causing its remaining suffix
 *   to be stored as a single node. Its size will be zero, represents its true,
 *   and its string pointer is initialized.
 */
 typedef struct tnode {
  Entry *entry;
  TrieInnerNode *tree;
  uint8_t tree_size;
  // Skips are at most TRIE_MAX_SKIP_SIZE characters, and are stored in the
  // nodes
  char string[TRIE_MAX_SKIP_SIZE];
  uint8_t string_len;
  bool represents;
 } TrieNode;
 TrieInnerNode *tinode_init(char c);
 TrieNode *tnode_init();
 void tinode_free_cascade(TrieInnerNode *node);
 void tnode_free(TrieNode *node);
 TrieNode **tnode_search(TrieNode *node, const char c, bool create);
 void tinode_remove(TrieInnerNode *node, const char c);
 void tnode_remove(TrieNode *node, const char c);
--- a/trie/test/test.h
+++ b/trie/test/test.h
--- a/trie/test/trie/fuzzy.h
+++ b/trie/test/trie/fuzzy.h
@ -0,0 +1,205 @@
 #ifndef AD3_FUZZYTEST
 #define AD3_FUZZYTEST
 #include <stdlib.h>
 #include <stdbool.h>
 #include <string.h>
 #include <stdio.h>
 #include "trie.h"
 typedef struct fuzzyconfig {
    int seed;
    int word_length;
    int word_count;
 } FuzzyConfig;
 void random_clean_string(char* s, int len) {
    char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789,?";
    int charset_len = strlen(charset);
    // len - 1 ensures that we can still set the null byte for the final byte
    int actual_len = rand() % (len - 1);
    int key;
    int i;
    for (i = 0; i < actual_len; i++) {
        key = rand() % charset_len;
        s[i] = charset[key];
    }
    s[i] = '\0';
 }
 void random_string(char* s, int len) {
    int val = rand();
    // String can't be an empty string as they aren't supported
    s[0] = (char)(val % 255 + 1);
    for (int i = 1; i < len - 1; i++) {
        val = rand();
        s[i] = (char)(val % 256);
    }
    // Just in case no null characters were created
    s[len - 1] = '\0';
 }
 void random_string_matrix(char** s, int count, int len) {
    for (int i = 0; i < count; i++) {
        random_string(s[i], len);
    }
 }
 char** init_string_matrix(int count, int len) {
    char** matrix = malloc(count * sizeof(char*));
    for (int i = 0; i < count; i++) {
        matrix[i] = calloc(len, sizeof(char));
    }
    return matrix;
 }
 /**
 * Test a given trie implementation using randomly generated strings generated
 * using a given seed.
 *
 * @param seed seed to use for generating random strings
 * @param count how many strings to test with
 * @param len maximum length of each string
 * @param init_func function to creat a new trie of the wanted type
 * @param free_func function to free the given trie
 * @param add_func function to add a string to the given trie
 * @param remove_func function to remove a string from the given trie
 * @param size_func function to get the size of the given trie
 * @return exit code describing failures, if any
 */
 int fuzzy_test_trie_seed(FuzzyConfig conf) {
    srand(conf.seed);
    char** matrix = init_string_matrix(conf.word_count, conf.word_length);
    random_string_matrix(matrix, conf.word_count, conf.word_length);
    bool* contains = calloc(conf.word_count, sizeof(bool));
    // It's possible that the string matrix contains duplicate strings
    bool** contains_dedupped = calloc(conf.word_count, sizeof(bool*));
    for (int i = 0; i < conf.word_count; i++) {
        if (contains_dedupped[i] == NULL) {
            contains_dedupped[i] = contains + i;
            for (int j = i + 1; j < conf.word_count; j++) {
                if (strcmp(matrix[i], matrix[j]) == 0) {
                    contains_dedupped[j] = contains + i;
                }
            }
        }
    }
    // We keep track of the size as well so that we can check whether this is
    // also correct
    size_t size = 0;
    Trie *ct;
    trie_init(&ct, NULL);
    bool changed;
    TrieExitCode status;
    // 0: success
    // 1: invalid add
    // 2: invalid remove
    // 3: bad size after adds
    // 4: bad size after removes
    int exit_code = 0;
    // Add all strings to trie, checking for duplicates
    for (int i = 0; i < conf.word_count; i++) {
      status = trie_add(ct, matrix[i], NULL);
      // if changed is false, *contains_dedupped[i] should be true, as changed
      // can only be false if the string is already contained in the trie. if
      // changed is true, *contains_dedupped[i] should be false, as the string
      // cannot be in the trie yet.
      if (status == Ok && *contains_dedupped[i]) {
        exit_code = 1;
        goto END;
      }
      if (!*contains_dedupped[i]) {
        *contains_dedupped[i] = true;
        size++;
      }
    }
    // Ensure size is correct
    if (trie_size(ct) != size) {
      printf("%i %i\n", trie_size(ct), size);
      exit_code = 3;
      goto END;
    }
    // Remove all strings again, again taking duplicates into consideration
    /* for (int i = 0; i < conf.word_count; i++) { */
    /*   changed = remove_func(ct, matrix[i]); */
    /*   // The string shouldn't be in the trie, yet another add operation */
    /*   // says it added it as well */
    /*   if (changed != *contains_dedupped[i]) { */
    /*     exit_code = 2; */
    /*     goto END; */
    /*   } */
    /*   if (*contains_dedupped[i]) { */
    /*     *contains_dedupped[i] = false; */
    /*     size--; */
    /*   } */
    /* } */
    // Finally, check that the trie is completely empty
    /* if (size_func(ct) != 0) { */
    /*   exit_code = 4; */
    /* } */
 END:
    trie_free(ct);
    // Even testing functions should properly free memory
    free(contains);
    free(contains_dedupped);
    for (int i = 0; i < conf.word_count; i++) {
        free(matrix[i]);
    }
    free(matrix);
    return exit_code;
 }
 /**
 * Same as fuzzy_test_trie_seed, except that the seed is randomly generated.
 *
 * @param count how many strings to test with
 * @param len maximum length of each string
 * @param init_func function to creat a new trie of the wanted type
 * @param free_func function to free the given trie
 * @param add_func function to add a string to the given trie
 * @param remove_func function to remove a string from the given trie
 * @param size_func function to get the size of the given trie
 * @return the generated seed if the test wasn't successful, -1 otherwise.
 */
 /* int fuzzy_test_trie(int count, int len, void* (*init_func) (), void (*free_func) (void*), bool (*add_func) (void*, char*), bool (*remove_func) (void*, char*), int (*size_func) (void*)) { */
 /*     int seed = rand(); */
 /*     bool succeeded = fuzzy_test_trie_seed(seed, count, len, init_func, free_func, add_func, remove_func, size_func); */
 /*     if (!succeeded) { */
 /*         return seed; */
 /*     } */
 /*     return -1; */
 /* } */
 #endif
--- a/trie/test/trie/test_trie.c
+++ b/trie/test/trie/test_trie.c
@ -0,0 +1,189 @@
 #include "test.h"
 #include "trie.h"
 #include "fuzzy.h"
 #define TEST_SIZE(ct, size) \
    TEST_CHECK(trie_size(ct) == size); \
    TEST_MSG("Size: %zu", trie_size(ct))
 # define TRIE_INIT() \
  Trie *ct; \
  trie_init(&ct, NULL); \
  TEST_CHECK(ct != NULL)
 void test_init() {
  TRIE_INIT();
  TEST_SIZE(ct, 0);
  trie_free(ct);
 }
 void test_add_one() {
  TRIE_INIT();
  Entry *entry = entry_new(Redirect, "");
  const char* string = "this is a test";
  TEST_CHECK(trie_add(ct, string, entry) == Ok);
  Entry *entry2;
  TEST_CHECK(trie_search(ct, &entry2, string) == Ok);
  TEST_CHECK(entry == entry2);
  TEST_SIZE(ct, 1);
  trie_free(ct);
 }
 void test_add_prefix() {
  TRIE_INIT();
  const char *s1 = "halloween-2022";
  const char *s2 = "halloween-202";
  Entry *entry1 = entry_new(Redirect, "");
  Entry *entry2 = entry_new(Redirect, "");
  TEST_CHECK(trie_add(ct, s1, entry1) == Ok);
  TEST_CHECK(trie_add(ct, s2, entry2) == Ok);
  Entry *entry3;
  TEST_CHECK(trie_search(ct, &entry3, s1) == Ok);
  TEST_CHECK(entry3 == entry1);
  entry2 = NULL;
  TEST_CHECK(trie_search(ct, &entry3, s2) == Ok);
  TEST_CHECK(entry3 == entry2);
  trie_free(ct);
 }
 void test_search_not_present() {
  TRIE_INIT();
  TEST_CHECK(trie_add(ct, "this string exists", NULL) == Ok);
  Entry *entry;
  TEST_CHECK(trie_search(ct, &entry, "this string does not exist") == NotFound);
  trie_free(ct);
 }
 void test_add_more() {
  TRIE_INIT();
  const char* one = "one";
  const char* two = "two";
  const char* twenty = "twenty";
  const char* twentytwo = "twentytwo";
  Entry *entry = entry_new(Redirect, "");
  TEST_CHECK(trie_add(ct, one, entry) == Ok);
  TEST_CHECK(trie_add(ct, two, entry) == Ok);
  TEST_CHECK(trie_add(ct, twenty, entry) == Ok);
  TEST_CHECK(trie_add(ct, twentytwo, entry) == Ok);
  TEST_SIZE(ct, 4);
  Entry *entry2;
  TEST_CHECK(trie_search(ct, &entry2, one) == Ok);
  TEST_CHECK(entry2 == entry);
  entry2 = NULL;
  TEST_CHECK(trie_search(ct, &entry2, two) == Ok);
  TEST_CHECK(entry2 == entry);
  entry2 = NULL;
  TEST_CHECK(trie_search(ct, &entry2, twenty) == Ok);
  TEST_CHECK(entry2 == entry);
  entry2 = NULL;
  TEST_CHECK(trie_search(ct, &entry2, twentytwo) == Ok);
  TEST_CHECK(entry2 == entry);
  entry2 = NULL;
  TEST_CHECK(trie_add(ct, one, NULL) == AlreadyPresent);
  TEST_CHECK(trie_add(ct, two, NULL) == AlreadyPresent);
  TEST_CHECK(trie_add(ct, twenty, NULL) == AlreadyPresent);
  TEST_CHECK(trie_add(ct, twentytwo, NULL) == AlreadyPresent);
  trie_free(ct);
 }
 /* void test_remove_one() { */
 /*     Trie* ct = trie_init(); */
 /*     TEST_CHECK(ct != NULL); */
 /*     const char* string = "this is a test"; */
 /*     TEST_CHECK(trie_add(ct, string, NULL)); */
 /*     TEST_SIZE(ct, 1); */
 /*     TEST_CHECK(trie_remove(ct, string)); */
 /*     TEST_SIZE(ct, 0); */
 /*     trie_free(ct); */
 /* } */
 /* void test_remove_more() { */
 /*     Trie* ct = trie_init(); */
 /*     TEST_CHECK(ct != NULL); */
 /*     const char* one = "one"; */
 /*     const char* two = "two"; */
 /*     const char* twenty = "twenty"; */
 /*     const char* twentytwo = "twentytwo"; */
 /*     TEST_CHECK(trie_add(ct, one, NULL)); */
 /*     TEST_CHECK(trie_add(ct, two, NULL)); */
 /*     TEST_CHECK(trie_add(ct, twenty, NULL)); */
 /*     TEST_CHECK(trie_add(ct, twentytwo, NULL)); */
 /*     TEST_SIZE(ct, 4); */
 /*     TEST_CHECK(trie_remove(ct, one)); */
 /*     TEST_CHECK(trie_remove(ct, two)); */
 /*     TEST_CHECK(trie_remove(ct, twenty)); */
 /*     TEST_CHECK(trie_remove(ct, twentytwo)); */
 /*     TEST_SIZE(ct, 0); */
 /*     trie_free(ct); */
 /* } */
 /* void test_remove_not_present() { */
 /*     Trie* ct = trie_init(); */
 /*     TEST_CHECK(ct != NULL); */
 /*     TEST_CHECK(trie_add(ct, "this string exists", NULL)); */
 /*     TEST_CHECK(!trie_remove(ct, "this string does not exist")); */
 /*     trie_free(ct); */
 /* } */
 // Test seeds that are known to fail so we don't get regressions
 void test_fuzzy_set() {
  FuzzyConfig configs[] = {
    { 403318210, 5, 500},
    { 588218406, 16, 460},
    { 297512224, 21, 500},
    { 403318210, 5, 500}
  };
  int count = sizeof(configs) / sizeof(FuzzyConfig);
  int res;
  for (int i = 0; i < count; i++) {
 res = fuzzy_test_trie_seed(configs[i]);
    TEST_CHECK_(res == 0,
                "Failed config, seed = %i, len = %i, count = %i, code=%i", configs[i].seed, configs[i].word_length, configs[i].word_count, res);
    }
 }
 TEST_LIST = {
        {"trie init",test_init },
        { "trie add one",test_add_one },
        { "trie add more",test_add_more },
        { "trie search not present",test_search_not_present},
        /* { "trie remove one",test_remove_one }, */
        /* { "trie remove more",test_remove_more }, */
        /* { "trie remove not present",test_remove_not_present}, */
        { "trie fuzzy set", test_fuzzy_set },
        { NULL, NULL}
 };
--- a/trie/test/trie/test_trie_fuzzy.c
+++ b/trie/test/trie/test_trie_fuzzy.c
@ -0,0 +1,34 @@
 #include "test.h"
 #include "trie.h"
 #include "fuzzy.h"
 void test_fuzzy() {
    // Randomize seed
    srand(time(NULL));
    FuzzyConfig config;
    int counter = 0;
    int res;
    for (int len = 1; len < 25; len += 5) {
      for (int count = 10; count <= 500; count += 10) {
        for (int i = 0; i < 50; i++) {
          counter++;
          config.seed = rand();
          config.word_length = len;
          config.word_count = count;
 res = fuzzy_test_trie_seed(config);
    TEST_CHECK_(res == 0,
                "Failed config, seed = %i, len = %i, count = %i, code = %i", config.seed, config.word_length, config.word_count, res);
        }
      }
    }
    TEST_MSG("fuzzy tests done = %i", counter);
 }
 TEST_LIST = {
  { "customtrie fuzzy", test_fuzzy },
  { NULL, NULL}
 };