diff --git a/CMakeLists.txt b/CMakeLists.txt index 4a2efbe..5122f34 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,10 +1,15 @@ -cmake_minimum_required(VERSION 3.24) +cmake_minimum_required(VERSION 3.20) project(lander C CXX) set(CMAKE_C_STANDARD 17) add_subdirectory(crow) -include_directories(crow/include) +add_subdirectory(tries) +include_directories(crow/include tries/include) + +if(CMAKE_BUILD_TYPE STREQUAL Release) + add_compile_options(-O3 -flto) +endif() add_executable(lander src/main.cpp) -target_link_libraries(lander PUBLIC Crow) +target_link_libraries(lander PUBLIC Crow ternarytrie) diff --git a/Makefile b/Makefile index 10a9384..3db982f 100644 --- a/Makefile +++ b/Makefile @@ -20,6 +20,15 @@ $(BUILD_DIR)/Debug/Makefile: CMakeLists.txt build: cmake @ make -C '$(BUILD_DIR)/Debug' +.PHONY: cmake-release +cmake-release: $(BUILD_DIR)/Release/Makefile +$(BUILD_DIR)/Release/Makefile: CMakeLists.txt + @ cmake -B'$(BUILD_DIR)/Release' -DCMAKE_BUILD_TYPE=Release . + +.PHONY: prod +prod: cmake-release + @ make -C '$(BUILD_DIR)/Release' + .PHONY: run run: build @ ./build/Debug/lander diff --git a/tries/CMakeLists.txt b/tries/CMakeLists.txt new file mode 100644 index 0000000..08f1a94 --- /dev/null +++ b/tries/CMakeLists.txt @@ -0,0 +1,8 @@ +cmake_minimum_required(VERSION 3.20) +project(AD3-project-2022-2023 C) + +set(CMAKE_C_STANDARD 17) + +include_directories(include) + +add_library(ternarytrie STATIC include/ternarytrie.h src/ternarytrie.c) diff --git a/tries/include/ternarytrie.h b/tries/include/ternarytrie.h new file mode 100644 index 0000000..f33dc23 --- /dev/null +++ b/tries/include/ternarytrie.h @@ -0,0 +1,72 @@ +#ifndef AD3_TERNARYTRIE +#define AD3_TERNARYTRIE + +/** + * The implementation of a Ternary Trie. + * + * Each node should be represented by a binary tree in order to reduce the memory usage. + */ + +#include +#include + +/** + * Type definition for the struct representing the current Trie. + * + * You can (and should) redefine this in your c-file with the concrete fields. + */ +typedef struct ttrie TernaryTrie; + +/** + * Allocate and initialize an empty Trie. + * + * @return a pointer to an empty Trie struct + */ +TernaryTrie* ternarytrie_init(); + +/** + * De-allocate a trie by freeing the memory occupied by this trie. + * + * @param trie which should be freed + */ +void ternarytrie_free(TernaryTrie* trie); + +/** + * Search whether a string is contained in this trie. + * + * @param trie + * @param string + * @return true if the string is contained within this trie, false otherwise + */ +bool ternarytrie_search(TernaryTrie* trie, const char* string); + +/** + * Add a string to this trie. + * + * @param trie + * @param string + * @return true if the trie was changed by this operation, false if it was already present + */ +bool ternarytrie_add(TernaryTrie* trie, const char* string); + +/** + * Remove a string from this trie. + * + * Note: strings added to this trie are considered to be "owned" by the caller. + * Removing the string from the trie should not free the string's memory. + * + * @param trie + * @param string + * @return true if the string was present and has been removed, false if it was not present + */ +bool ternarytrie_remove(TernaryTrie* trie, const char* string); + +/** + * Returns the number of strings in this trie. + * + * @param trie + * @return the number of strings in this trie + */ +size_t ternarytrie_size(TernaryTrie* trie); + +#endif //AD3_TERNARYTRIE diff --git a/tries/src/common.c b/tries/src/common.c new file mode 100644 index 0000000..2b3a9fe --- /dev/null +++ b/tries/src/common.c @@ -0,0 +1,42 @@ +#define ALPHABET_SIZE 256 +#define DELIMITER '\0' +#define MAX(x, y) (((x) > (y)) ? (x) : (y)) + +#include +#include + +/** + * Own implementation of strdup, heavily inspired by the glibc source code. + * + * This is neccessary because subGIT does not seem to have a strdup + * implementation available for use. + * + * @param s string to duplicate + * @return pointer to the newly allocated string + */ +char *my_strdup(const char *s) { + size_t len = strlen(s); + char *new = malloc(len + 1); + new[len] = DELIMITER; + + return (char *)memcpy(new, s, len); +} + +/** + * Own implementation of strndup, heavily inspired by the glibc source code. + * + * This is neccessary because subGIT does not seem to have a strndup + * implementation available for use. + * + * @param s string to duplicate + * @return pointer to the newly allocated string + */ +char *my_strndup(const char *s, size_t n) { + size_t string_len = strlen(s); + size_t len = MAX(string_len, n); + char *new = (char *)malloc(len + 1); + + new[len] = DELIMITER; + + return (char *)memcpy(new, s, len); +} diff --git a/tries/src/ternarytrie.c b/tries/src/ternarytrie.c new file mode 100644 index 0000000..6578527 --- /dev/null +++ b/tries/src/ternarytrie.c @@ -0,0 +1,247 @@ +#include "ternarytrie.h" +#include "ternarytrie_node.c" +#include +#include +#include + +typedef struct ttrie { + TernaryTrieNode *root; + size_t size; +} TernaryTrie; + +/** + * Allocate and initialize an empty TernaryTrie + * + * @return pointer to the empty TernaryTrie + */ +inline TernaryTrie *ternarytrie_init() { + TernaryTrie *node = calloc(1, sizeof(TernaryTrie)); + node->root = ttnode_init(); + + return node; +} + +/** + * De-allocate a TernaryTree by freeing its entire underlying structure. + * + * @param trie trie to free + */ +inline void ternarytrie_free(TernaryTrie *trie) { + ttnode_free(trie->root); + free(trie); +} + +typedef struct searchresult { + TernaryTrieNode *parent; + TernaryTrieNode *child; +} SearchResult; + +SearchResult ternarytrie_search_node(TernaryTrie *trie, const char *string) { + SearchResult out = {NULL, NULL}; + + // Edge case for empty string + if (string[0] == DELIMITER) { + if (trie->root->type == 1) { + out.child = trie->root; + } + + return out; + } + + size_t i = 0; + TernaryTrieNode **node_ptr = &(trie->root); + TernaryTrieNode **child_ptr; + + do { + child_ptr = ttnode_search(*node_ptr, string[i], false); + + // We don't have to check whether *node_ptr is NULL, because if it was + // NULL, it wouldn't be in the binary tree. + if (child_ptr == NULL || *child_ptr == NULL) { + return out; + } + + i++; + + if (string[i] == DELIMITER || (*child_ptr)->type == 2) { + break; + } + + node_ptr = child_ptr; + } while (1); + + if ((*child_ptr)->type == 2) { + if (string[i] != DELIMITER && + strcmp(string + i, (*child_ptr)->ptr.string) == 0) { + out.child = *child_ptr; + out.parent = *node_ptr; + } + } + // Here we know we've traversed through the entire string and have arrived at + // a node that isn't a full leaf + else if ((*child_ptr)->type == 1) { + out.child = *child_ptr; + out.parent = *node_ptr; + } + + return out; +} + +/** + * Returns whether the given string is present in the trie. + * + * @param trie trie to look in + * @param string string to look up + * @return true if the string is present in the trie, false otherwise + */ +inline bool ternarytrie_search(TernaryTrie *trie, const char *string) { + SearchResult res = ternarytrie_search_node(trie, string); + + return res.child != NULL; +} + +/** + * Add the given string to the TernaryTrie. + * + * @param trie trie to add string to + * @param string string to add + * @return true if the string wasn't present in the trie and thus added, false + * otherwise + */ +bool ternarytrie_add(TernaryTrie *trie, const char *string) { + // Edge case for empty string + if (string[0] == DELIMITER) { + if (trie->root->type == 0) { + trie->root->type = 1; + trie->size++; + + return true; + } + + return false; + } + + size_t i = 0; + TernaryTrieNode **node_ptr = &(trie->root); + TernaryTrieNode **new_node_ptr; + + do { + new_node_ptr = ttnode_search(*node_ptr, string[i], true); + + // ttnode_search will only return NULL with create true if the node to look + // in represents a full leaf. Therefore, we split the node and restart the + // iteration. + if (new_node_ptr == NULL) { + // It's possible we've ended up in the full leaf node that represents this + // string + if (strcmp(string + i, (*node_ptr)->ptr.string) == 0) { + return false; + } + + ttnode_split(*node_ptr); + continue; + } + + node_ptr = new_node_ptr; + + // The search function has added the character to the node + i++; + + // The next node in the string's path doesn't exist yet, so we add it to the + // trie + if (*node_ptr == NULL) { + TernaryTrieNode *new_node = ttnode_init(); + + // If there's a remaining part of the string, we add it to the leaf + if (string[i] != DELIMITER) { + ttnode_set_string(new_node, string + i); + } else { + new_node->type = 1; + } + + *node_ptr = new_node; + + trie->size++; + + return true; + } + } while (string[i] != DELIMITER); + + // If we've arrived here, we've traversed through the entire string and have + // arrived at a node that already exists. + + // The existing node is a full leaf, so we split it and make it + // represent our new string. + if ((*node_ptr)->type == 2) { + ttnode_split(*node_ptr); + } + // The string is already in the trie + else if ((*node_ptr)->type == 1) { + return false; + } + + (*node_ptr)->type = 1; + + trie->size++; + + return true; +} + +/** + * Remove the given string from a TernaryTrie. + * + * @param trie trie to remove string from + * @param string string to remove + * @return true if the string was in the trie and thus removed, false otherwise + */ +bool ternarytrie_remove(TernaryTrie *trie, const char *string) { + SearchResult res = ternarytrie_search_node(trie, string); + + if (res.child == NULL) { + return false; + } + + trie->size--; + + if (res.parent != NULL) { + // We're removing a full leaf, so we calculate the offset of the character + // to remove from the parent + if (res.child->type == 2) { + size_t str_len = strlen(string); + size_t suffix_len = strlen(res.child->ptr.string); + + ttnode_remove(res.parent, string[str_len - suffix_len - 1]); + } + // In the other case, the character to remove from the parent is the last + // character of the string + else if (res.child->size == 0) { + size_t i = 0; + + while (string[i + 1] != DELIMITER) { + i++; + } + + ttnode_remove(res.parent, string[i]); + } else { + res.child->type = 0; + + return true; + } + + ttnode_free(res.child); + } + // We're in the root here + else { + res.child->type = 0; + } + + return true; +} + +/** + * Return the current size of the given trie. + * + * @param trie trie to return size for + * @return size of the trie + */ +inline size_t ternarytrie_size(TernaryTrie *trie) { return trie->size; } diff --git a/tries/src/ternarytrie_node.c b/tries/src/ternarytrie_node.c new file mode 100644 index 0000000..f101a3a --- /dev/null +++ b/tries/src/ternarytrie_node.c @@ -0,0 +1,312 @@ +#include "common.c" +#include +#include +#include + +/** + * Represents a node of the binary tree contained within each non-leaf + * TernaryTrieNode. + */ +typedef struct ttinode { + struct ttinode *left; + struct ttinode *right; + struct ttnode *next; + char key; +} TernaryTrieInnerNode; + +/** + * Represents a node inside a TernaryTrie. A node can be in one of three states: + * - Internal node: a node that's part of a path to a leaf node. This node will + * always have a size greater than one, and an initialized root. + * - Leaf: a node solely used to represent a string ending there. Its size is 0, + * its ptr is unitialized and represents is true. + * - Full leaf: a leaf node that contains a string. This occurs when a string is + * added whose path is not fully in the tree yet, causing its remaining suffix + * to be stored as a single node. Its size will be zero, represents its true, + * and its string pointer is initialized. + */ +typedef struct ttnode { + union { + TernaryTrieInnerNode *root; + char *string; + } ptr; + // What type of node this is + // 0: regular non-representing node + // 1: regular representing node + // 2: full leaf + uint8_t type; + // Dependent on type + // 0, 1: size of underlying binary tree + // 2: length of string + uint8_t size; +} TernaryTrieNode; + +// Required for recursively freeing tree structure +void ttnode_free(TernaryTrieNode *node); + +/** + * Allocate and initialize a new TernaryTrieInnerNode representing a given + * character. + * + * @param c character to represent + * @return pointer to newly allocated struct + */ +inline TernaryTrieInnerNode *ttinode_init(char c) { + TernaryTrieInnerNode *node = calloc(1, sizeof(TernaryTrieInnerNode)); + node->key = c; + + return node; +} + +/** + * Allocate and initialize a new TernaryTrieNode. + * + * @return pointer to newly allocated struct + */ +inline TernaryTrieNode *ttnode_init() { return calloc(1, sizeof(TernaryTrieNode)); } + +/** + * Free a TernaryTrieInnerNode and its underlying tree structure. This should + * usually only be called on the root of a binary tree to free the entire + * structure. + * + * @param node node whose tree to free + */ +void ttinode_free_cascade(TernaryTrieInnerNode *node) { + if (node->left != NULL) { + ttinode_free_cascade(node->left); + } + + if (node->right != NULL) { + ttinode_free_cascade(node->right); + } + + if (node->next != NULL) { + ttnode_free(node->next); + } + + free(node); +} + +/** + * Free a TernaryTrieNode and its underlying tree structure. + * + * @param node node to free + */ +void ttnode_free(TernaryTrieNode *node) { + if (node->type == 2) { + free(node->ptr.string); + } else if (node->size != 0) { + ttinode_free_cascade(node->ptr.root); + } + + free(node); +} + +/** + * Add the string to the given node & set its type accordingely. + * + * @param node node to add string to + * @param string string to add + */ +inline void ttnode_set_string(TernaryTrieNode *node, const char *string) { + node->type = 2; + node->size = strlen(string); + node->ptr.string = my_strdup(string); +} + +/** + * This function performs a lookup in the underlying binary tree of the given + * TernaryTrieNode. If found, the return value is a pointer to the memory + * location where the TernaryTrieInnerNode representing the given character + * stores its `next` field. If not found, the return value is NULL, unless + * `create` is true. + * + * NOTE: a non-NULL return value does not mean that the dereferenced value is + * also not NULL. In particular, if `create` is set to true and the function had + * to create the new node, the dereferenced value will always be NULL. + * + * @param node node to perform lookup in. If node is a full leaf, the return + * value will always be NULL, regardless of the value of create. + * @param create whether to create the TernaryTrieInnerNode if it isn't present + * yet. If this is set to true, the function will never return NULL unless the + * node represents a leaf with a string, because the struct and therefore the + * address is created if it doesn't exist yet. + */ +TernaryTrieNode **ttnode_search(TernaryTrieNode *node, const char c, + bool create) { + // Full leafs will always return NULL + if (node->type == 2) { + return NULL; + } + + // It can happen that the node has no initialized root yet + if (node->size == 0) { + if (create) { + node->size++; + node->ptr.root = ttinode_init(c); + + return &node->ptr.root->next; + } + + return NULL; + } + + TernaryTrieInnerNode *parent = node->ptr.root; + TernaryTrieInnerNode *child; + + // Iterate through the tree until we either find the character or realize it's + // not present in the tree + // FIXME don't use while (1) + while (1) { + if (parent->key == c) { + return &parent->next; + } else if (c < parent->key) { + child = parent->left; + } else { + child = parent->right; + } + + if (child == NULL) { + break; + } + + parent = child; + }; + + // child is NULL, meaning the character isn't in the binary tree yet. + + // If create is true, we create the new node so that we can still return a + // non-NULL pointer. + if (create) { + TernaryTrieInnerNode *new_node = ttinode_init(c); + + if (c < parent->key) { + parent->left = new_node; + } else { + parent->right = new_node; + } + + node->size++; + + return &new_node->next; + } + + return NULL; +} + +/** + * Split a remaining string leaf node in two. This function assumes it receives + * a full leaf as its input. + * + * @param node node to split + */ +void ttnode_split(TernaryTrieNode *node) { + TernaryTrieNode *new_node = ttnode_init(); + char key = node->ptr.string[0]; + + // There's a chance the remaining string was only 1 character, meaning the new + // node doesn't have to store a string + if (node->ptr.string[1] != DELIMITER) { + ttnode_set_string(new_node, node->ptr.string + 1); + } else { + new_node->type = 1; + } + + node->type = 0; + node->size = 0; + + free(node->ptr.string); + node->ptr.string = NULL; + + // Initialize node's binary tree with the correct character + TernaryTrieNode **node_ptr = ttnode_search(node, key, true); + *node_ptr = new_node; +} + +/* + * Remove the given character from a TernaryTrieInnerNode's subtree. The + * function assumes the character is indeed in the subtree. + */ +void ttinode_remove(TernaryTrieInnerNode *node, const char c) { + TernaryTrieInnerNode **to_remove_ptr = &node; + + // We use pointers to pointers here so we can later free the removed node + // without having to know what its parent is + while ((*to_remove_ptr)->key != c) { + to_remove_ptr = (c < (*to_remove_ptr)->key) ? &(*to_remove_ptr)->left + : &(*to_remove_ptr)->right; + }; + + // If the node isn't a leaf, we have to replace it with another + if ((*to_remove_ptr)->left != NULL || (*to_remove_ptr)->right != NULL) { + TernaryTrieInnerNode *to_replace = *to_remove_ptr; + + // Replace with its only right child + if (to_replace->left == NULL) { + TernaryTrieInnerNode *to_remove = to_replace->right; + + to_replace->key = to_remove->key; + to_replace->next = to_remove->next; + to_replace->left = to_remove->left; + to_replace->right = to_remove->right; + + free(to_remove); + } + // Replace with its only left child + else if (to_replace->right == NULL) { + TernaryTrieInnerNode *to_remove = to_replace->left; + + to_replace->key = to_remove->key; + to_replace->next = to_remove->next; + to_replace->left = to_remove->left; + to_replace->right = to_remove->right; + + free(to_remove); + } + // Node has two children, so replace with successor + else { + TernaryTrieInnerNode *to_remove_parent = to_replace; + TernaryTrieInnerNode *to_remove = to_replace->right; + + while (to_remove->left != NULL) { + to_remove_parent = to_remove; + to_remove = to_remove->left; + } + + to_replace->key = to_remove->key; + to_replace->next = to_remove->next; + + if (to_remove_parent != to_replace) { + to_remove_parent->left = to_remove->right; + } else { + to_remove_parent->right = to_remove->right; + } + + free(to_remove); + } + } + // We're the leaf, so we free ourselves + else { + free(*to_remove_ptr); + *to_remove_ptr = NULL; + } +} + +/** + * Remove the given character from a TernaryTrieNode, respecting the rules + * of a binary search tree. This function assumes the character is in the search + * tree. + * + * @param node node to remove character from + * @param c character to remove + */ +inline void ttnode_remove(TernaryTrieNode *node, const char c) { + ttinode_remove(node->ptr.root, c); + + node->size--; + + if (node->size == 0) { + node->ptr.root = NULL; + } +}