chore: remove old trie library
							parent
							
								
									d8f015f923
								
							
						
					
					
						commit
						62ac53cef6
					
				|  | @ -7,7 +7,7 @@ SRC_DIR        = src | |||
| TEST_DIR       = test | ||||
| THIRDPARTY_DIR = thirdparty | ||||
| 
 | ||||
| INC_DIRS  = include $(THIRDPARTY_DIR)/include trie/include lsm/include | ||||
| INC_DIRS  = include $(THIRDPARTY_DIR)/include lsm/include | ||||
| LIBS      = m lsm | ||||
| LIB_DIRS  = ./lsm/build | ||||
| 
 | ||||
|  |  | |||
|  | @ -1,95 +0,0 @@ | |||
| # https://spin.atomicobject.com/2016/08/26/makefile-c-projects/ was a great
 | ||||
| # base for this Makefile
 | ||||
| 
 | ||||
| -include config.mk | ||||
| 
 | ||||
| LIB := $(BUILD_DIR)/$(LIB_FILENAME) | ||||
| 
 | ||||
| SRCS != find '$(SRC_DIR)' -iname '*.c' | ||||
| SRCS_H != find $(INC_DIRS) -iname '*.h' | ||||
| SRCS_H_INTERNAL != find $(SRC_DIR) -iname '*.h' | ||||
| SRCS_TEST != find '$(TEST_DIR)' -iname '*.c' | ||||
| 
 | ||||
| OBJS := $(SRCS:%=$(BUILD_DIR)/%.o) | ||||
| OBJS_TEST := $(SRCS_TEST:%=$(BUILD_DIR)/%.o) | ||||
| DEPS := $(SRCS:%=$(BUILD_DIR)/%.d) $(SRCS_TEST:%=$(BUILD_DIR)/%.d) | ||||
| 
 | ||||
| BINS_TEST := $(OBJS_TEST:%.c.o=%) | ||||
| TARGETS_TEST := $(BINS_TEST:%=test-%) | ||||
| TARGETS_MEM_TEST := $(BINS_TEST:%=test-mem-%) | ||||
| 
 | ||||
| _CFLAGS := $(addprefix -I,$(INC_DIRS)) $(CFLAGS) -Wall -Wextra | ||||
| 
 | ||||
| .PHONY: all | ||||
| all: lib | ||||
| 
 | ||||
| 
 | ||||
| # =====COMPILATION=====
 | ||||
| # Utility used by the CI to lint
 | ||||
| .PHONY: objs | ||||
| objs: $(OBJS) | ||||
| 
 | ||||
| .PHONY: lib | ||||
| lib: $(LIB) | ||||
| $(LIB): $(OBJS) | ||||
| 	ar -rcs $@ $(OBJS) | ||||
| 
 | ||||
| $(BUILD_DIR)/$(SRC_DIR)/%.c.o: $(SRC_DIR)/%.c | ||||
| 	mkdir -p $(dir $@) | ||||
| 	$(CC) -c $(_CFLAGS) $< -o $@ | ||||
| 
 | ||||
| 
 | ||||
| # =====TESTING=====
 | ||||
| .PHONY: test | ||||
| test: $(TARGETS_TEST) | ||||
| 
 | ||||
| .PHONY: test-mem | ||||
| test-mem: $(TARGETS_MEM_TEST) | ||||
| 
 | ||||
| .PHONY: $(TARGETS_TEST) | ||||
| $(TARGETS_TEST): test-%: % | ||||
| 	./$^ | ||||
| 
 | ||||
| .PHONY: $(TARGETS_MEM_TEST) | ||||
| $(TARGETS_MEM_TEST): test-mem-%: % | ||||
| 	valgrind --tool=memcheck --error-exitcode=1 --track-origins=yes --leak-check=full ./$^ | ||||
| 
 | ||||
| .PHONY: build-test | ||||
| build-test: $(BINS_TEST) | ||||
| 
 | ||||
| $(BINS_TEST): %: %.c.o $(LIB) | ||||
| 	$(CC) \
 | ||||
| 		$^ -o $@ | ||||
| 
 | ||||
| # Along with the include directory, each test includes $(TEST_DIR) (which
 | ||||
| # contains the acutest.h header file), and the src directory of the module it's
 | ||||
| # testing. This allows tests to access internal methods, which aren't publicly
 | ||||
| # exposed.
 | ||||
| $(BUILD_DIR)/$(TEST_DIR)/%.c.o: $(TEST_DIR)/%.c | ||||
| 	mkdir -p $(dir $@) | ||||
| 	$(CC) $(_CFLAGS) -I$(TEST_DIR) \
 | ||||
| 		-I$(dir $(@:$(BUILD_DIR)/$(TEST_DIR)/%=$(SRC_DIR)/%)) \
 | ||||
| 		-c $< -o $@ | ||||
| 
 | ||||
| # =====MAINTENANCE=====
 | ||||
| .PHONY: lint | ||||
| lint: | ||||
| 	clang-format -n --Werror $(SRCS) $(SRCS_H) $(SRCS_H_INTERNAL) | ||||
| 
 | ||||
| .PHONY: fmt | ||||
| fmt: | ||||
| 	clang-format -i $(SRCS) $(SRCS_H) $(SRCS_H_INTERNAL) | ||||
| 
 | ||||
| .PHONY: clean | ||||
| clean: | ||||
| 	rm -rf $(BUILD_DIR) | ||||
| 
 | ||||
| 
 | ||||
| .PHONY: bear | ||||
| bear: clean | ||||
| 	bear -- make | ||||
| 	bear --append -- make build-test | ||||
| 
 | ||||
| 
 | ||||
| # Make make aware of the .d files
 | ||||
| -include $(DEPS) | ||||
|  | @ -1,16 +0,0 @@ | |||
| # Trie design | ||||
| 
 | ||||
| The underlying data structure is based on a combination of a ternary and a | ||||
| Patricia trie. | ||||
| 
 | ||||
| * Nodes are classic ternary trie nodes, meaning each node contains a binary | ||||
|   search tree | ||||
| * Each node can define a skip, like a Patricia trie, of at most 8 characters. | ||||
|   These skipped characters are stored directly in the structs defining the | ||||
|   nodes. | ||||
| * While the add function relies on the fact that the input is a NULL-terminated | ||||
|   C string, the trie itself does not store any NULL bytes. | ||||
| 
 | ||||
| The goal of this datastructure is to be as optimized as possible for search | ||||
| operations with short (usually < 8 characters) keys, as this is by far the most | ||||
| common operation for a URL shortener/pastebin. | ||||
|  | @ -1,13 +0,0 @@ | |||
| LIB_FILENAME = libtrie.a | ||||
| 
 | ||||
| BUILD_DIR = build | ||||
| SRC_DIR = src | ||||
| TEST_DIR = test | ||||
| INC_DIRS = include | ||||
| 
 | ||||
| # -MMD: generate a .d file for every source file. This file can be imported by
 | ||||
| #  make and makes make aware that a header file has been changed, ensuring an
 | ||||
| #  object file is also recompiled if only a header is changed.
 | ||||
| # -MP: generate a dummy target for every header file (according to the  docs it
 | ||||
| #  prevents some errors when removing header files)
 | ||||
| CFLAGS = -MMD -MP -g | ||||
|  | @ -1,148 +0,0 @@ | |||
| #ifndef AD3_TERNARYTRIE | ||||
| #define AD3_TERNARYTRIE | ||||
| 
 | ||||
| #define ALPHABET_SIZE 256 | ||||
| #define DELIMITER '\0' | ||||
| #define MAX(x, y) (((x) > (y)) ? (x) : (y)) | ||||
| 
 | ||||
| // Should not be higher than 254 or stuff will break
 | ||||
| #define TRIE_MAX_SKIP_SIZE 8 | ||||
| 
 | ||||
| /**
 | ||||
|  * The implementation of a Ternary Trie. | ||||
|  * | ||||
|  * Each node should be represented by a binary tree in order to reduce the | ||||
|  * memory usage. | ||||
|  */ | ||||
| 
 | ||||
| #include <stdbool.h> | ||||
| #include <stddef.h> | ||||
| #include <string.h> | ||||
| 
 | ||||
| static const char charset[] = | ||||
|     "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; | ||||
| static const size_t charset_len = sizeof(charset) - 1; | ||||
| 
 | ||||
| // Length of randomly generated keys
 | ||||
| #define RANDOM_KEY_LENGTH_SHORT 4 | ||||
| #define RANDOM_KEY_LENGTH_LONG 16 | ||||
| 
 | ||||
| /**
 | ||||
|  * Type definition for the struct representing the current Trie. | ||||
|  * | ||||
|  * You can (and should) redefine this in your c-file with the concrete fields. | ||||
|  */ | ||||
| typedef struct ttrie Trie; | ||||
| 
 | ||||
| typedef enum entry_type { Redirect, Paste, Unknown } EntryType; | ||||
| 
 | ||||
| typedef struct entry { | ||||
|   EntryType type; | ||||
|   char *string; | ||||
| } Entry; | ||||
| 
 | ||||
| typedef enum trie_exit_code { | ||||
|   Ok = 0, | ||||
|   NotFound, | ||||
|   AlreadyPresent, | ||||
|   FileError | ||||
| } TrieExitCode; | ||||
| 
 | ||||
| Entry *entry_new(EntryType type, const char *string); | ||||
| 
 | ||||
| /**
 | ||||
|  * Allocate & initialize a new trie, and populate it with the data from the | ||||
|  * given data file. | ||||
|  * | ||||
|  * @return 0 if everything was successful, non-zero otherwise | ||||
|  */ | ||||
| TrieExitCode trie_init(Trie **trie_ptr, const char *file_path); | ||||
| 
 | ||||
| /**
 | ||||
|  * De-allocate a trie by freeing the memory occupied by this trie. | ||||
|  * | ||||
|  * @param trie which should be freed | ||||
|  */ | ||||
| void trie_free(Trie *trie); | ||||
| 
 | ||||
| /**
 | ||||
|  * Search for an entry in the trie. | ||||
|  * | ||||
|  * @param trie | ||||
|  * @param entry_ptr pointer to Entry will be stored here, if found | ||||
|  * @param key key representing the entry | ||||
|  * @return 0 if the search was successful, 1 if not found | ||||
|  */ | ||||
| TrieExitCode trie_search(Trie *trie, Entry **entry_ptr, const char *key); | ||||
| 
 | ||||
| TrieExitCode trie_search_len(Trie *trie, Entry **entry_ptr, const char *key, | ||||
|                              size_t key_len); | ||||
| 
 | ||||
| /**
 | ||||
|  * Add a string to this trie. | ||||
|  * | ||||
|  * @param trie | ||||
|  * @param key key to represent entry with | ||||
|  * @param entry entry to add | ||||
|  * @return 0 if added, 1 if already in trie, something else if other errors | ||||
|  */ | ||||
| TrieExitCode trie_add(Trie *trie, const char *key, Entry *entry); | ||||
| 
 | ||||
| TrieExitCode trie_add_len(Trie *trie, const char *key, size_t key_len, | ||||
|                           Entry *entry); | ||||
| 
 | ||||
| /**
 | ||||
|  * Add an entry by generating a random string as the key. | ||||
|  * | ||||
|  * @param trie | ||||
|  * @param entry entry to add | ||||
|  * @param secure whether to generate a longer, more secure random key | ||||
|  * @return pointer to the generated key. This pointer is safe to use after | ||||
|  * unlocking the trie, and should be freed manually. | ||||
|  */ | ||||
| TrieExitCode trie_add_random(Trie *trie, char **key_ptr, Entry *entry, | ||||
|                              bool secure); | ||||
| 
 | ||||
| /**
 | ||||
|  * Remove an entry from this trie given its key. | ||||
|  * | ||||
|  * @param trie | ||||
|  * @param key key representing entry | ||||
|  * @return true if the entry was present and has been removed, false if it was | ||||
|  * not present | ||||
|  */ | ||||
| bool trie_remove(Trie *trie, const char *key); | ||||
| 
 | ||||
| /**
 | ||||
|  * Returns the number of entries in this trie. | ||||
|  * | ||||
|  * @param trie | ||||
|  * @return the number of entries in this trie | ||||
|  */ | ||||
| size_t trie_size(Trie *trie); | ||||
| 
 | ||||
| /*
 | ||||
|  * Acquire a read lock on the trie. | ||||
|  * | ||||
|  * @return 0 if successful, non-zero otherwise (return value of | ||||
|  * pthread_rwlock_rdlock) | ||||
|  */ | ||||
| int trie_rlock(Trie *trie); | ||||
| 
 | ||||
| /*
 | ||||
|  * Acquire a write lock on the trie. | ||||
|  * | ||||
|  * @return 0 if successful, non-zero otherwise (return value of | ||||
|  * pthread_rwlock_wrlock) | ||||
|  */ | ||||
| int trie_wlock(Trie *trie); | ||||
| 
 | ||||
| /*
 | ||||
|  * Release the lock on a trie after having acquired it beforehand. | ||||
|  * | ||||
|  * @return 0 if successful, non-zero otherwise (return value of | ||||
|  * pthread_rwlock_unlock) | ||||
|  */ | ||||
| int trie_unlock(Trie *trie); | ||||
| 
 | ||||
| #endif // AD3_TERNARYTRIE
 | ||||
|  | @ -1,428 +0,0 @@ | |||
| #include <pthread.h> | ||||
| #include <stdint.h> | ||||
| #include <stdio.h> | ||||
| #include <stdlib.h> | ||||
| #include <string.h> | ||||
| 
 | ||||
| #include "trie.h" | ||||
| #include "trie_entry.h" | ||||
| #include "trie_node.h" | ||||
| 
 | ||||
| typedef struct ttrie { | ||||
|   TrieNode *root; | ||||
|   size_t size; | ||||
|   char *file_path; | ||||
|   pthread_rwlock_t lock; | ||||
| } Trie; | ||||
| 
 | ||||
| TrieExitCode trie_add_no_lock(Trie *trie, const char *key, Entry *entry); | ||||
| 
 | ||||
| /**
 | ||||
|  * Allocate and initialize an empty Trie | ||||
|  * | ||||
|  * @return pointer to the empty Trie | ||||
|  */ | ||||
| TrieExitCode trie_init(Trie **trie_ptr, const char *file_path) { | ||||
|   // Allocate & initialize trie
 | ||||
|   Trie *trie = calloc(1, sizeof(Trie)); | ||||
|   trie->root = tnode_init(); | ||||
|   pthread_rwlock_init(&trie->lock, NULL); | ||||
| 
 | ||||
|   if (file_path == NULL) { | ||||
|     trie->file_path = NULL; | ||||
|     *trie_ptr = trie; | ||||
|     return Ok; | ||||
|   } | ||||
| 
 | ||||
|   trie->file_path = strdup(file_path); | ||||
| 
 | ||||
|   // Populate trie with data from file
 | ||||
|   FILE *fp = fopen(file_path, "r"); | ||||
| 
 | ||||
|   if (fp == NULL) { | ||||
|     return FileError; | ||||
|   } | ||||
| 
 | ||||
|   // We read in lines of at most 8192 characters (sounds like enough)
 | ||||
|   char buffer[8192]; | ||||
|   EntryType type; | ||||
|   Entry *entry; | ||||
|   int i, j; | ||||
|   TrieExitCode status; | ||||
| 
 | ||||
|   while (fgets(buffer, 8192, fp)) { | ||||
|     i = 0; | ||||
| 
 | ||||
|     // Move index in buffer until we encounter first space character
 | ||||
|     while (buffer[i] != ' ') { | ||||
|       i++; | ||||
|     } | ||||
| 
 | ||||
|     // Split the buffer into two strings, the key and the payload
 | ||||
|     buffer[i] = '\0'; | ||||
| 
 | ||||
|     type = entry_type_from_char(buffer[i + 1]); | ||||
| 
 | ||||
|     // Skip type character & its surrounding spaces
 | ||||
|     j = i + 3; | ||||
| 
 | ||||
|     // Now remove the newline character
 | ||||
|     while (buffer[j] != '\n') { | ||||
|       j++; | ||||
|     } | ||||
| 
 | ||||
|     buffer[j] = '\0'; | ||||
| 
 | ||||
|     entry = entry_new(type, buffer + i + 3); | ||||
|     status = trie_add_no_lock(trie, buffer, entry); | ||||
| 
 | ||||
|     if (status != Ok) { | ||||
|       trie_free(trie); | ||||
|       return status; | ||||
|     } | ||||
|   } | ||||
| 
 | ||||
|   fclose(fp); | ||||
| 
 | ||||
|   *trie_ptr = trie; | ||||
| 
 | ||||
|   return Ok; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * De-allocate a TernaryTree by freeing its entire underlying structure. | ||||
|  * | ||||
|  * @param trie trie to free | ||||
|  */ | ||||
| void trie_free(Trie *trie) { | ||||
|   tnode_free(trie->root); | ||||
|   free(trie); | ||||
| } | ||||
| 
 | ||||
| typedef struct searchresult { | ||||
|   TrieNode *parent; | ||||
|   TrieNode *child; | ||||
| } SearchResult; | ||||
| 
 | ||||
| SearchResult trie_search_node_len(Trie *trie, const char *key, size_t key_len) { | ||||
|   SearchResult out = {NULL, NULL}; | ||||
| 
 | ||||
|   size_t i = 0; | ||||
|   TrieNode **node_ptr = &(trie->root); | ||||
|   TrieNode **child_ptr; | ||||
| 
 | ||||
|   do { | ||||
|     child_ptr = tnode_search(*node_ptr, key[i], false); | ||||
| 
 | ||||
|     // We don't have to check whether *node_ptr is NULL, because if it was
 | ||||
|     // NULL, it wouldn't be in the binary tree.
 | ||||
|     if (child_ptr == NULL) { | ||||
|       return out; | ||||
|     } | ||||
| 
 | ||||
|     i++; | ||||
| 
 | ||||
|     if (memcmp((*child_ptr)->string, key + i, (*child_ptr)->string_len) != 0) { | ||||
|       return out; | ||||
|     } | ||||
| 
 | ||||
|     i += (*child_ptr)->string_len; | ||||
| 
 | ||||
|     if (i < key_len) { | ||||
|       node_ptr = child_ptr; | ||||
|     } | ||||
|   } while (i < key_len); | ||||
| 
 | ||||
|   // At this point, we've either arrived at an empty child, or traversed through
 | ||||
|   // the entire string. Therefore, all we have to do is check whether we're at
 | ||||
|   // the end of the string and if node represents a string.
 | ||||
|   if (i == key_len && (*child_ptr)->represents) { | ||||
|     out.parent = *node_ptr; | ||||
|     out.child = *child_ptr; | ||||
|   } | ||||
| 
 | ||||
|   return out; | ||||
| } | ||||
| 
 | ||||
| SearchResult trie_search_node(Trie *trie, const char *key) { | ||||
|   return trie_search_node_len(trie, key, strlen(key)); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Returns whether the given string is present in the trie. | ||||
|  * | ||||
|  * @param trie trie to look in | ||||
|  * @param string string to look up | ||||
|  * @return true if the string is present in the trie, false otherwise | ||||
|  */ | ||||
| TrieExitCode trie_search_len(Trie *trie, Entry **entry_ptr, const char *key, | ||||
|                              size_t key_len) { | ||||
|   SearchResult res = trie_search_node_len(trie, key, key_len); | ||||
| 
 | ||||
|   if (res.child == NULL) { | ||||
|     return NotFound; | ||||
|   } | ||||
| 
 | ||||
|   *entry_ptr = res.child->entry; | ||||
| 
 | ||||
|   return Ok; | ||||
| } | ||||
| 
 | ||||
| TrieExitCode trie_search(Trie *trie, Entry **entry_ptr, const char *key) { | ||||
|   return trie_search_len(trie, entry_ptr, key, strlen(key)); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Add the given string to the Trie. | ||||
|  * | ||||
|  * @param trie trie to add string to | ||||
|  * @param string string to add | ||||
|  * @return true if the string wasn't present in the trie and thus added, false | ||||
|  * otherwise | ||||
|  */ | ||||
| TrieExitCode trie_add_len_no_lock(Trie *trie, const char *key, size_t key_len, | ||||
|                                   Entry *entry) { | ||||
|   size_t i = 0; | ||||
|   uint8_t offset; | ||||
|   TrieNode **node_ptr = &(trie->root); | ||||
|   TrieNode **child_node_ptr; | ||||
|   TrieNode *child_node; | ||||
| 
 | ||||
|   do { | ||||
|     offset = 0; | ||||
|     child_node_ptr = tnode_search(*node_ptr, key[i], true); | ||||
| 
 | ||||
|     i++; | ||||
| 
 | ||||
|     // We've reached a NULL child, so we add the remaining part of the string
 | ||||
|     // here
 | ||||
|     if (*child_node_ptr == NULL) { | ||||
|       child_node = tnode_init(); | ||||
| 
 | ||||
|       while (offset < TRIE_MAX_SKIP_SIZE && i + offset < key_len) { | ||||
|         offset++; | ||||
|       } | ||||
| 
 | ||||
|       memcpy(child_node->string, key + i, offset); | ||||
| 
 | ||||
|       child_node->string_len = offset; | ||||
|       *child_node_ptr = child_node; | ||||
| 
 | ||||
|       // If the remaining part of the string is still longer than the maximum
 | ||||
|       // allowed skip length, we continue through the loop. The next iteration
 | ||||
|       // will enter this if statement again, and perform the same loop, until
 | ||||
|       // the string is fully added to the trie.
 | ||||
|       if (i + offset < key_len) { | ||||
|         node_ptr = child_node_ptr; | ||||
|         i += offset; | ||||
| 
 | ||||
|         continue; | ||||
|       } | ||||
| 
 | ||||
|       child_node->represents = true; | ||||
|       child_node->entry = entry; | ||||
| 
 | ||||
|       trie->size++; | ||||
|       return Ok; | ||||
|     } | ||||
| 
 | ||||
|     while (offset < (*child_node_ptr)->string_len) { | ||||
|       // String no longer aligns with edge, so we have to split
 | ||||
|       if (key[i + offset] != (*child_node_ptr)->string[offset]) { | ||||
|         TrieNode *split_node = tnode_init(); | ||||
|         child_node = *child_node_ptr; | ||||
| 
 | ||||
|         // New string of the split node is the prefix that we were able
 | ||||
|         // to skip
 | ||||
|         if (offset > 0) { | ||||
|           memcpy(split_node->string, child_node->string, offset); | ||||
|           split_node->string_len = offset; | ||||
|         } | ||||
| 
 | ||||
|         // split_node replaces child_node as the child of node
 | ||||
|         *child_node_ptr = split_node; | ||||
|         TrieNode **new_node_ptr = | ||||
|             tnode_search(split_node, child_node->string[offset], true); | ||||
|         *new_node_ptr = child_node; | ||||
| 
 | ||||
|         // child_node has now become a child of split_node, so we update its
 | ||||
|         // string accordingely by removing the skipped prefix + the one
 | ||||
|         // character that's already stored by being a child of split_node
 | ||||
|         /* char *old_string = child_node->string.ptr; */ | ||||
|         uint8_t new_skip_len = child_node->string_len - (offset + 1); | ||||
| 
 | ||||
|         if (new_skip_len > 0) { | ||||
|           char old_string[TRIE_MAX_SKIP_SIZE]; | ||||
|           memcpy(old_string, child_node->string + offset + 1, new_skip_len); | ||||
|           memcpy(child_node->string, old_string, new_skip_len); | ||||
|         } | ||||
| 
 | ||||
|         child_node->string_len = new_skip_len; | ||||
| 
 | ||||
|         // The while loop will exit either way after this has happened, as
 | ||||
|         // child_node is now split_node and split_node's len is already set to
 | ||||
|         // offset.
 | ||||
|         break; | ||||
|       } | ||||
| 
 | ||||
|       offset++; | ||||
|     } | ||||
| 
 | ||||
|     node_ptr = child_node_ptr; | ||||
| 
 | ||||
|     i += offset; | ||||
|   } while (i < key_len); | ||||
| 
 | ||||
|   if ((*child_node_ptr)->represents) { | ||||
|     return AlreadyPresent; | ||||
|   } | ||||
| 
 | ||||
|   (*child_node_ptr)->represents = true; | ||||
|   (*child_node_ptr)->entry = entry; | ||||
|   trie->size++; | ||||
|   return Ok; | ||||
| } | ||||
| 
 | ||||
| TrieExitCode trie_add_no_lock(Trie *trie, const char *key, Entry *entry) { | ||||
|   return trie_add_len_no_lock(trie, key, strlen(key), entry); | ||||
| } | ||||
| 
 | ||||
| TrieExitCode trie_add_len(Trie *trie, const char *key, size_t key_len, | ||||
|                           Entry *entry) { | ||||
|   if (trie->file_path != NULL) { | ||||
|     // Easiest way to make sure we don't add duplicate entries
 | ||||
|     // We use an internal function that doesn't require a read lock, as we're
 | ||||
|     // already inside a write lock
 | ||||
|     if (trie_search_node_len(trie, key, key_len).child != NULL) { | ||||
|       return AlreadyPresent; | ||||
|     } | ||||
| 
 | ||||
|     FILE *fp = fopen(trie->file_path, "a"); | ||||
| 
 | ||||
|     if (fp == NULL) { | ||||
|       return FileError; | ||||
|     } | ||||
| 
 | ||||
|     fputs(key, fp); | ||||
|     fputs(" ", fp); | ||||
|     fputc(entry_type_to_char(entry->type), fp); | ||||
|     fputs(" ", fp); | ||||
|     fputs(entry->string, fp); | ||||
|     fputs("\n", fp); | ||||
| 
 | ||||
|     fclose(fp); | ||||
|   } | ||||
| 
 | ||||
|   // This function *should* always return Ok. Otherwise, the function would've
 | ||||
|   // exited because the string was found in the trie.
 | ||||
|   return trie_add_len_no_lock(trie, key, key_len, entry); | ||||
| } | ||||
| 
 | ||||
| TrieExitCode trie_add(Trie *trie, const char *key, Entry *entry) { | ||||
|   return trie_add_len(trie, key, strlen(key), entry); | ||||
| } | ||||
| 
 | ||||
| TrieExitCode trie_add_random(Trie *trie, char **key_ptr, Entry *entry, | ||||
|                              bool secure) { | ||||
|   // Generate random key
 | ||||
|   bool ok = false; | ||||
|   int key_length = secure ? RANDOM_KEY_LENGTH_LONG : RANDOM_KEY_LENGTH_SHORT; | ||||
|   char *key = malloc(key_length + 1); | ||||
|   key[key_length] = '\0'; | ||||
| 
 | ||||
|   // We naively generate new keys until we find a key that isn't in the trie
 | ||||
|   // yet. With charset_len ** RANDOM_KEY_LENGTH sufficiently large, this isn't a
 | ||||
|   // problem, because the chances of collisions are extremely small.
 | ||||
|   while (!ok) { | ||||
|     for (int i = 0; i < key_length; i++) { | ||||
|       key[i] = charset[rand() % charset_len]; | ||||
|     } | ||||
| 
 | ||||
|     ok = trie_search_node(trie, key).child == NULL; | ||||
|   } | ||||
| 
 | ||||
|   TrieExitCode return_value = trie_add(trie, key, entry); | ||||
| 
 | ||||
|   if (return_value == Ok) { | ||||
|     *key_ptr = key; | ||||
|   } else { | ||||
|     free(key); | ||||
|   } | ||||
| 
 | ||||
|   return return_value; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Remove the given string from a Trie. | ||||
|  * | ||||
|  * @param trie trie to remove string from | ||||
|  * @param string string to remove | ||||
|  * @return true if the string was in the trie and thus removed, false otherwise | ||||
|  */ | ||||
| /* bool trie_remove(Trie *trie, const char *string) { */ | ||||
| /*   pthread_rwlock_wrlock(&trie->lock); */ | ||||
| 
 | ||||
| /*   bool return_value = false; */ | ||||
| 
 | ||||
| /*   SearchResult res = trie_search_node(trie, string); */ | ||||
| 
 | ||||
| /*   if (res.child == NULL) { */ | ||||
| /*     goto end; */ | ||||
| /*   } */ | ||||
| 
 | ||||
| /*   trie->size--; */ | ||||
| /*   return_value = true; */ | ||||
| 
 | ||||
| /*   if (res.parent != NULL) { */ | ||||
| /*     // We're removing a full leaf, so we calculate the offset of the
 | ||||
|  * character */ | ||||
| /*     // to remove from the parent */ | ||||
| /*     if (res.child->type == 2) { */ | ||||
| /*       size_t str_len = strlen(string); */ | ||||
| /*       size_t suffix_len = strlen(res.child->ptr.string); */ | ||||
| 
 | ||||
| /*       tnode_remove(res.parent, string[str_len - suffix_len - 1]); */ | ||||
| /*     } */ | ||||
| /*     // In the other case, the character to remove from the parent is the last
 | ||||
|  */ | ||||
| /*     // character of the string */ | ||||
| /*     else if (res.child->size == 0) { */ | ||||
| /*       size_t i = 0; */ | ||||
| 
 | ||||
| /*       while (string[i + 1] != DELIMITER) { */ | ||||
| /*         i++; */ | ||||
| /*       } */ | ||||
| 
 | ||||
| /*       tnode_remove(res.parent, string[i]); */ | ||||
| /*     } else { */ | ||||
| /*       res.child->type = 0; */ | ||||
| 
 | ||||
| /*       goto end; */ | ||||
| /*     } */ | ||||
| 
 | ||||
| /*     tnode_free(res.child); */ | ||||
| /*   } */ | ||||
| /*   // We're in the root here */ | ||||
| /*   else { */ | ||||
| /*     res.child->type = 0; */ | ||||
| /*   } */ | ||||
| 
 | ||||
| /* end: */ | ||||
| /*   pthread_rwlock_unlock(&trie->lock); */ | ||||
| 
 | ||||
| /*   return return_value; */ | ||||
| /* } */ | ||||
| 
 | ||||
| /**
 | ||||
|  * Return the current size of the given trie. | ||||
|  * | ||||
|  * @param trie trie to return size for | ||||
|  * @return size of the trie | ||||
|  */ | ||||
| size_t trie_size(Trie *trie) { return trie->size; } | ||||
| 
 | ||||
| int trie_rlock(Trie *trie) { return pthread_rwlock_rdlock(&trie->lock); } | ||||
| 
 | ||||
| int trie_wlock(Trie *trie) { return pthread_rwlock_wrlock(&trie->lock); } | ||||
| 
 | ||||
| int trie_unlock(Trie *trie) { return pthread_rwlock_unlock(&trie->lock); } | ||||
|  | @ -1,37 +0,0 @@ | |||
| #include "trie_entry.h" | ||||
| #include <stdlib.h> | ||||
| 
 | ||||
| EntryType entry_type_from_char(char c) { | ||||
|   switch (c) { | ||||
|   case '0': | ||||
|     return Redirect; | ||||
|   case '1': | ||||
|     return Paste; | ||||
|   default: | ||||
|     return Unknown; | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| char entry_type_to_char(EntryType et) { | ||||
|   switch (et) { | ||||
|   case Redirect: | ||||
|     return '0'; | ||||
|   case Paste: | ||||
|     return '1'; | ||||
|   default: | ||||
|     return '\0'; | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| Entry *entry_new(EntryType type, const char *string) { | ||||
|   Entry *entry = malloc(sizeof(Entry)); | ||||
|   entry->type = type; | ||||
| 
 | ||||
|   if (string != NULL) { | ||||
|     entry->string = strdup(string); | ||||
|   } else { | ||||
|     entry->string = NULL; | ||||
|   } | ||||
| 
 | ||||
|   return entry; | ||||
| } | ||||
|  | @ -1,7 +0,0 @@ | |||
| #include "trie.h" | ||||
| 
 | ||||
| EntryType entry_type_from_char(char c); | ||||
| 
 | ||||
| char entry_type_to_char(EntryType et); | ||||
| 
 | ||||
| Entry *entry_new(EntryType type, const char *string); | ||||
|  | @ -1,265 +0,0 @@ | |||
| #include <stdbool.h> | ||||
| #include <stdint.h> | ||||
| #include <stdlib.h> | ||||
| 
 | ||||
| #include "trie_node.h" | ||||
| 
 | ||||
| /**
 | ||||
|  * Allocate and initialize a new TrieInnerNode representing a given | ||||
|  * character. | ||||
|  * | ||||
|  * @param c character to represent | ||||
|  * @return pointer to newly allocated struct | ||||
|  */ | ||||
| TrieInnerNode *tinode_init(char c) { | ||||
|   TrieInnerNode *node = calloc(1, sizeof(TrieInnerNode)); | ||||
|   node->key = c; | ||||
| 
 | ||||
|   return node; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Allocate and initialize a new TrieNode. | ||||
|  * | ||||
|  * @return pointer to newly allocated struct | ||||
|  */ | ||||
| TrieNode *tnode_init() { | ||||
|   TrieNode *node = malloc(sizeof(TrieNode)); | ||||
| 
 | ||||
|   node->tree_size = 0; | ||||
|   node->string_len = 0; | ||||
|   node->represents = false; | ||||
| 
 | ||||
|   return node; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Free a TrieInnerNode and its underlying tree structure. This should | ||||
|  * usually only be called on the root of a binary tree to free the entire | ||||
|  * structure. | ||||
|  * | ||||
|  * @param node node whose tree to free | ||||
|  */ | ||||
| void tinode_free_cascade(TrieInnerNode *node) { | ||||
|   if (node->left != NULL) { | ||||
|     tinode_free_cascade(node->left); | ||||
|   } | ||||
| 
 | ||||
|   if (node->right != NULL) { | ||||
|     tinode_free_cascade(node->right); | ||||
|   } | ||||
| 
 | ||||
|   if (node->next != NULL) { | ||||
|     tnode_free(node->next); | ||||
|   } | ||||
| 
 | ||||
|   free(node); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Free a TrieNode and its underlying tree structure. | ||||
|  * | ||||
|  * @param node node to free | ||||
|  */ | ||||
| void tnode_free(TrieNode *node) { | ||||
|   if (node->tree_size > 0) { | ||||
|     tinode_free_cascade(node->tree); | ||||
|   } | ||||
| 
 | ||||
|   // TODO properly free entry
 | ||||
|   /* if (node->payload != NULL) { */ | ||||
|   /*     free(node->payload); */ | ||||
|   /* } */ | ||||
| 
 | ||||
|   free(node); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * This function performs a lookup in the underlying binary tree of the given | ||||
|  * TrieNode. If found, the return value is a pointer to the memory | ||||
|  * location where the TrieInnerNode representing the given character | ||||
|  * stores its `next` field. If not found, the return value is NULL, unless | ||||
|  * `create` is true. | ||||
|  * | ||||
|  * NOTE: a non-NULL return value does not mean that the dereferenced value is | ||||
|  * also not NULL. In particular, if `create` is set to true and the function had | ||||
|  * to create the new node, the dereferenced value will always be NULL. | ||||
|  * | ||||
|  * @param node node to perform lookup in. If node is a full leaf, the return | ||||
|  * value will always be NULL, regardless of the value of create. | ||||
|  * @param create whether to create the TrieInnerNode if it isn't present | ||||
|  * yet. If this is set to true, the function will never return NULL unless the | ||||
|  * node represents a leaf with a string, because the struct and therefore the | ||||
|  * address is created if it doesn't exist yet. | ||||
|  */ | ||||
| TrieNode **tnode_search(TrieNode *node, const char c, bool create) { | ||||
|   // It can happen that the node has no initialized root yet
 | ||||
|   if (node->tree_size == 0) { | ||||
|     if (create) { | ||||
|       node->tree_size++; | ||||
|       node->tree = tinode_init(c); | ||||
| 
 | ||||
|       return &node->tree->next; | ||||
|     } | ||||
| 
 | ||||
|     return NULL; | ||||
|   } | ||||
| 
 | ||||
|   TrieInnerNode *parent = node->tree; | ||||
|   TrieInnerNode *child; | ||||
| 
 | ||||
|   // Iterate through the tree until we either find the character or realize it's
 | ||||
|   // not present in the tree
 | ||||
|   // FIXME don't use while (1)
 | ||||
|   while (1) { | ||||
|     if (parent->key == c) { | ||||
|       return &parent->next; | ||||
|     } else if (c < parent->key) { | ||||
|       child = parent->left; | ||||
|     } else { | ||||
|       child = parent->right; | ||||
|     } | ||||
| 
 | ||||
|     if (child == NULL) { | ||||
|       break; | ||||
|     } | ||||
| 
 | ||||
|     parent = child; | ||||
|   }; | ||||
| 
 | ||||
|   // child is NULL, meaning the character isn't in the binary tree yet.
 | ||||
| 
 | ||||
|   // If create is true, we create the new node so that we can still return a
 | ||||
|   // non-NULL pointer.
 | ||||
|   if (create) { | ||||
|     TrieInnerNode *new_node = tinode_init(c); | ||||
| 
 | ||||
|     if (c < parent->key) { | ||||
|       parent->left = new_node; | ||||
|     } else { | ||||
|       parent->right = new_node; | ||||
|     } | ||||
| 
 | ||||
|     node->tree_size++; | ||||
| 
 | ||||
|     return &new_node->next; | ||||
|   } | ||||
| 
 | ||||
|   return NULL; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Split a remaining string leaf node in two. This function assumes it receives | ||||
|  * a full leaf as its input. | ||||
|  * | ||||
|  * @param node node to split | ||||
|  */ | ||||
| /* void tnode_split(TrieNode *node) { */ | ||||
| /*   TrieNode *new_node = tnode_init(); */ | ||||
| /*   char key = node->ptr.string[0]; */ | ||||
| 
 | ||||
| /*   // There's a chance the remaining string was only 1 character, meaning the
 | ||||
|  * new */ | ||||
| /*   // node doesn't have to store a string */ | ||||
| /*   if (node->ptr.string[1] != DELIMITER) { */ | ||||
| /*     tnode_set_string(new_node, node->ptr.string + 1); */ | ||||
| /*   } else { */ | ||||
| /*     new_node->type = 1; */ | ||||
| /*   } */ | ||||
| 
 | ||||
| /*   new_node->entry = node->entry; */ | ||||
| 
 | ||||
| /*   node->type = 0; */ | ||||
| /*   node->size = 0; */ | ||||
| /*   node->entry = NULL; */ | ||||
| 
 | ||||
| /*   free(node->ptr.string); */ | ||||
| /*   node->ptr.string = NULL; */ | ||||
| 
 | ||||
| /*   // Initialize node's binary tree with the correct character */ | ||||
| /*   TrieNode **node_ptr = tnode_search(node, key, true); */ | ||||
| /*   *node_ptr = new_node; */ | ||||
| /* } */ | ||||
| 
 | ||||
| /*
 | ||||
|  * Remove the given character from a TrieInnerNode's subtree. The | ||||
|  * function assumes the character is indeed in the subtree. | ||||
|  */ | ||||
| void tinode_remove(TrieInnerNode *node, const char c) { | ||||
|   TrieInnerNode **to_remove_ptr = &node; | ||||
| 
 | ||||
|   // We use pointers to pointers here so we can later free the removed node
 | ||||
|   // without having to know what its parent is
 | ||||
|   while ((*to_remove_ptr)->key != c) { | ||||
|     to_remove_ptr = (c < (*to_remove_ptr)->key) ? &(*to_remove_ptr)->left | ||||
|                                                 : &(*to_remove_ptr)->right; | ||||
|   }; | ||||
| 
 | ||||
|   // If the node isn't a leaf, we have to replace it with another
 | ||||
|   if ((*to_remove_ptr)->left != NULL || (*to_remove_ptr)->right != NULL) { | ||||
|     TrieInnerNode *to_replace = *to_remove_ptr; | ||||
| 
 | ||||
|     // Replace with its only right child
 | ||||
|     if (to_replace->left == NULL) { | ||||
|       TrieInnerNode *to_remove = to_replace->right; | ||||
| 
 | ||||
|       to_replace->key = to_remove->key; | ||||
|       to_replace->next = to_remove->next; | ||||
|       to_replace->left = to_remove->left; | ||||
|       to_replace->right = to_remove->right; | ||||
| 
 | ||||
|       free(to_remove); | ||||
|     } | ||||
|     // Replace with its only left child
 | ||||
|     else if (to_replace->right == NULL) { | ||||
|       TrieInnerNode *to_remove = to_replace->left; | ||||
| 
 | ||||
|       to_replace->key = to_remove->key; | ||||
|       to_replace->next = to_remove->next; | ||||
|       to_replace->left = to_remove->left; | ||||
|       to_replace->right = to_remove->right; | ||||
| 
 | ||||
|       free(to_remove); | ||||
|     } | ||||
|     // Node has two children, so replace with successor
 | ||||
|     else { | ||||
|       TrieInnerNode *to_remove_parent = to_replace; | ||||
|       TrieInnerNode *to_remove = to_replace->right; | ||||
| 
 | ||||
|       while (to_remove->left != NULL) { | ||||
|         to_remove_parent = to_remove; | ||||
|         to_remove = to_remove->left; | ||||
|       } | ||||
| 
 | ||||
|       to_replace->key = to_remove->key; | ||||
|       to_replace->next = to_remove->next; | ||||
| 
 | ||||
|       if (to_remove_parent != to_replace) { | ||||
|         to_remove_parent->left = to_remove->right; | ||||
|       } else { | ||||
|         to_remove_parent->right = to_remove->right; | ||||
|       } | ||||
| 
 | ||||
|       free(to_remove); | ||||
|     } | ||||
|   } | ||||
|   // We're the leaf, so we free ourselves
 | ||||
|   else { | ||||
|     free(*to_remove_ptr); | ||||
|     *to_remove_ptr = NULL; | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Remove the given character from a TrieNode, respecting the rules | ||||
|  * of a binary search tree. This function assumes the character is in the search | ||||
|  * tree. | ||||
|  * | ||||
|  * @param node node to remove character from | ||||
|  * @param c character to remove | ||||
|  */ | ||||
| void tnode_remove(TrieNode *node, const char c) { | ||||
|   tinode_remove(node->tree, c); | ||||
| 
 | ||||
|   node->tree_size--; | ||||
| } | ||||
|  | @ -1,53 +0,0 @@ | |||
| #include <stdint.h> | ||||
| 
 | ||||
| #include "trie.h" | ||||
| 
 | ||||
| /**
 | ||||
|  * Represents a node of the binary tree contained within each non-leaf | ||||
|  * TrieNode. | ||||
|  */ | ||||
| typedef struct tinode { | ||||
|   struct tinode *left; | ||||
|   struct tinode *right; | ||||
|   struct tnode *next; | ||||
|   char key; | ||||
| } TrieInnerNode; | ||||
| 
 | ||||
| /**
 | ||||
|  * Represents a node inside a Trie. A node can be in one of three states: | ||||
|  * - Internal node: a node that's part of a path to a leaf node. This node will | ||||
|  *   always have a size greater than one, and an initialized root. | ||||
|  * - Leaf: a node solely used to represent a string ending there. Its size is 0, | ||||
|  *   its ptr is unitialized and represents is true. | ||||
|  * - Full leaf: a leaf node that contains a string. This occurs when a string is | ||||
|  *   added whose path is not fully in the tree yet, causing its remaining suffix | ||||
|  *   to be stored as a single node. Its size will be zero, represents its true, | ||||
|  *   and its string pointer is initialized. | ||||
|  */ | ||||
| typedef struct tnode { | ||||
|   Entry *entry; | ||||
| 
 | ||||
|   TrieInnerNode *tree; | ||||
|   uint8_t tree_size; | ||||
| 
 | ||||
|   // Skips are at most TRIE_MAX_SKIP_SIZE characters, and are stored in the
 | ||||
|   // nodes
 | ||||
|   char string[TRIE_MAX_SKIP_SIZE]; | ||||
|   uint8_t string_len; | ||||
| 
 | ||||
|   bool represents; | ||||
| } TrieNode; | ||||
| 
 | ||||
| TrieInnerNode *tinode_init(char c); | ||||
| 
 | ||||
| TrieNode *tnode_init(); | ||||
| 
 | ||||
| void tinode_free_cascade(TrieInnerNode *node); | ||||
| 
 | ||||
| void tnode_free(TrieNode *node); | ||||
| 
 | ||||
| TrieNode **tnode_search(TrieNode *node, const char c, bool create); | ||||
| 
 | ||||
| void tinode_remove(TrieInnerNode *node, const char c); | ||||
| 
 | ||||
| void tnode_remove(TrieNode *node, const char c); | ||||
							
								
								
									
										1839
									
								
								trie/test/test.h
								
								
								
								
							
							
						
						
									
										1839
									
								
								trie/test/test.h
								
								
								
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							|  | @ -1,205 +0,0 @@ | |||
| #ifndef AD3_FUZZYTEST | ||||
| #define AD3_FUZZYTEST | ||||
| 
 | ||||
| #include <stdlib.h> | ||||
| #include <stdbool.h> | ||||
| #include <string.h> | ||||
| #include <stdio.h> | ||||
| #include "trie.h" | ||||
| 
 | ||||
| typedef struct fuzzyconfig { | ||||
|     int seed; | ||||
|     int word_length; | ||||
|     int word_count; | ||||
| } FuzzyConfig; | ||||
| 
 | ||||
| void random_clean_string(char* s, int len) { | ||||
|     char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789,?"; | ||||
|     int charset_len = strlen(charset); | ||||
| 
 | ||||
|     // len - 1 ensures that we can still set the null byte for the final byte
 | ||||
|     int actual_len = rand() % (len - 1); | ||||
|     int key; | ||||
|     int i; | ||||
| 
 | ||||
|     for (i = 0; i < actual_len; i++) { | ||||
|         key = rand() % charset_len; | ||||
|         s[i] = charset[key]; | ||||
|     } | ||||
| 
 | ||||
|     s[i] = '\0'; | ||||
| } | ||||
| 
 | ||||
| void random_string(char* s, int len) { | ||||
|     int val = rand(); | ||||
| 
 | ||||
|     // String can't be an empty string as they aren't supported
 | ||||
|     s[0] = (char)(val % 255 + 1); | ||||
| 
 | ||||
|     for (int i = 1; i < len - 1; i++) { | ||||
|         val = rand(); | ||||
|         s[i] = (char)(val % 256); | ||||
|     } | ||||
| 
 | ||||
|     // Just in case no null characters were created
 | ||||
|     s[len - 1] = '\0'; | ||||
| } | ||||
| 
 | ||||
| void random_string_matrix(char** s, int count, int len) { | ||||
|     for (int i = 0; i < count; i++) { | ||||
|         random_string(s[i], len); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| char** init_string_matrix(int count, int len) { | ||||
|     char** matrix = malloc(count * sizeof(char*)); | ||||
| 
 | ||||
|     for (int i = 0; i < count; i++) { | ||||
|         matrix[i] = calloc(len, sizeof(char)); | ||||
|     } | ||||
| 
 | ||||
|     return matrix; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Test a given trie implementation using randomly generated strings generated | ||||
|  * using a given seed. | ||||
|  * | ||||
|  * @param seed seed to use for generating random strings | ||||
|  * @param count how many strings to test with | ||||
|  * @param len maximum length of each string | ||||
|  * @param init_func function to creat a new trie of the wanted type | ||||
|  * @param free_func function to free the given trie | ||||
|  * @param add_func function to add a string to the given trie | ||||
|  * @param remove_func function to remove a string from the given trie | ||||
|  * @param size_func function to get the size of the given trie | ||||
|  * @return exit code describing failures, if any | ||||
|  */ | ||||
| int fuzzy_test_trie_seed(FuzzyConfig conf) { | ||||
|     srand(conf.seed); | ||||
| 
 | ||||
|     char** matrix = init_string_matrix(conf.word_count, conf.word_length); | ||||
|     random_string_matrix(matrix, conf.word_count, conf.word_length); | ||||
|     bool* contains = calloc(conf.word_count, sizeof(bool)); | ||||
| 
 | ||||
|     // It's possible that the string matrix contains duplicate strings
 | ||||
|     bool** contains_dedupped = calloc(conf.word_count, sizeof(bool*)); | ||||
| 
 | ||||
|     for (int i = 0; i < conf.word_count; i++) { | ||||
|         if (contains_dedupped[i] == NULL) { | ||||
|             contains_dedupped[i] = contains + i; | ||||
| 
 | ||||
|             for (int j = i + 1; j < conf.word_count; j++) { | ||||
|                 if (strcmp(matrix[i], matrix[j]) == 0) { | ||||
|                     contains_dedupped[j] = contains + i; | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     // We keep track of the size as well so that we can check whether this is
 | ||||
|     // also correct
 | ||||
|     size_t size = 0; | ||||
| 
 | ||||
|     Trie *ct; | ||||
|     trie_init(&ct, NULL); | ||||
| 
 | ||||
|     bool changed; | ||||
|     TrieExitCode status; | ||||
| 
 | ||||
|     // 0: success
 | ||||
|     // 1: invalid add
 | ||||
|     // 2: invalid remove
 | ||||
|     // 3: bad size after adds
 | ||||
|     // 4: bad size after removes
 | ||||
|     int exit_code = 0; | ||||
| 
 | ||||
|     // Add all strings to trie, checking for duplicates
 | ||||
|     for (int i = 0; i < conf.word_count; i++) { | ||||
|       status = trie_add(ct, matrix[i], NULL); | ||||
| 
 | ||||
|       // if changed is false, *contains_dedupped[i] should be true, as changed
 | ||||
|       // can only be false if the string is already contained in the trie. if
 | ||||
|       // changed is true, *contains_dedupped[i] should be false, as the string
 | ||||
|       // cannot be in the trie yet.
 | ||||
|       if (status == Ok && *contains_dedupped[i]) { | ||||
|         exit_code = 1; | ||||
|         goto END; | ||||
|       } | ||||
| 
 | ||||
|       if (!*contains_dedupped[i]) { | ||||
|         *contains_dedupped[i] = true; | ||||
|         size++; | ||||
|       } | ||||
|     } | ||||
| 
 | ||||
|     // Ensure size is correct
 | ||||
|     if (trie_size(ct) != size) { | ||||
|       printf("%i %i\n", trie_size(ct), size); | ||||
|       exit_code = 3; | ||||
|       goto END; | ||||
|     } | ||||
| 
 | ||||
|     // Remove all strings again, again taking duplicates into consideration
 | ||||
|     /* for (int i = 0; i < conf.word_count; i++) { */ | ||||
|     /*   changed = remove_func(ct, matrix[i]); */ | ||||
| 
 | ||||
|     /*   // The string shouldn't be in the trie, yet another add operation */ | ||||
|     /*   // says it added it as well */ | ||||
|     /*   if (changed != *contains_dedupped[i]) { */ | ||||
|     /*     exit_code = 2; */ | ||||
|     /*     goto END; */ | ||||
|     /*   } */ | ||||
| 
 | ||||
|     /*   if (*contains_dedupped[i]) { */ | ||||
|     /*     *contains_dedupped[i] = false; */ | ||||
|     /*     size--; */ | ||||
|     /*   } */ | ||||
|     /* } */ | ||||
| 
 | ||||
|     // Finally, check that the trie is completely empty
 | ||||
|     /* if (size_func(ct) != 0) { */ | ||||
|     /*   exit_code = 4; */ | ||||
|     /* } */ | ||||
| 
 | ||||
| END: | ||||
|     trie_free(ct); | ||||
| 
 | ||||
|     // Even testing functions should properly free memory
 | ||||
|     free(contains); | ||||
|     free(contains_dedupped); | ||||
| 
 | ||||
|     for (int i = 0; i < conf.word_count; i++) { | ||||
|         free(matrix[i]); | ||||
|     } | ||||
| 
 | ||||
|     free(matrix); | ||||
| 
 | ||||
|     return exit_code; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Same as fuzzy_test_trie_seed, except that the seed is randomly generated. | ||||
|  * | ||||
|  * @param count how many strings to test with | ||||
|  * @param len maximum length of each string | ||||
|  * @param init_func function to creat a new trie of the wanted type | ||||
|  * @param free_func function to free the given trie | ||||
|  * @param add_func function to add a string to the given trie | ||||
|  * @param remove_func function to remove a string from the given trie | ||||
|  * @param size_func function to get the size of the given trie | ||||
|  * @return the generated seed if the test wasn't successful, -1 otherwise. | ||||
|  */ | ||||
| /* int fuzzy_test_trie(int count, int len, void* (*init_func) (), void (*free_func) (void*), bool (*add_func) (void*, char*), bool (*remove_func) (void*, char*), int (*size_func) (void*)) { */ | ||||
| /*     int seed = rand(); */ | ||||
| /*     bool succeeded = fuzzy_test_trie_seed(seed, count, len, init_func, free_func, add_func, remove_func, size_func); */ | ||||
| 
 | ||||
| /*     if (!succeeded) { */ | ||||
| /*         return seed; */ | ||||
| /*     } */ | ||||
| 
 | ||||
| /*     return -1; */ | ||||
| /* } */ | ||||
| 
 | ||||
| 
 | ||||
| #endif | ||||
|  | @ -1,189 +0,0 @@ | |||
| #include "test.h" | ||||
| #include "trie.h" | ||||
| #include "fuzzy.h" | ||||
| 
 | ||||
| #define TEST_SIZE(ct, size) \ | ||||
|     TEST_CHECK(trie_size(ct) == size); \ | ||||
|     TEST_MSG("Size: %zu", trie_size(ct)) | ||||
| 
 | ||||
| # define TRIE_INIT() \ | ||||
|   Trie *ct; \ | ||||
|   trie_init(&ct, NULL); \ | ||||
|   TEST_CHECK(ct != NULL) | ||||
| 
 | ||||
| void test_init() { | ||||
|   TRIE_INIT(); | ||||
|   TEST_SIZE(ct, 0); | ||||
|   trie_free(ct); | ||||
| } | ||||
| 
 | ||||
| void test_add_one() { | ||||
|   TRIE_INIT(); | ||||
| 
 | ||||
|   Entry *entry = entry_new(Redirect, ""); | ||||
|   const char* string = "this is a test"; | ||||
| 
 | ||||
|   TEST_CHECK(trie_add(ct, string, entry) == Ok); | ||||
|   Entry *entry2; | ||||
|   TEST_CHECK(trie_search(ct, &entry2, string) == Ok); | ||||
|   TEST_CHECK(entry == entry2); | ||||
|   TEST_SIZE(ct, 1); | ||||
|   trie_free(ct); | ||||
| } | ||||
| 
 | ||||
| void test_add_prefix() { | ||||
|   TRIE_INIT(); | ||||
| 
 | ||||
|   const char *s1 = "halloween-2022"; | ||||
|   const char *s2 = "halloween-202"; | ||||
| 
 | ||||
|   Entry *entry1 = entry_new(Redirect, ""); | ||||
|   Entry *entry2 = entry_new(Redirect, ""); | ||||
| 
 | ||||
|   TEST_CHECK(trie_add(ct, s1, entry1) == Ok); | ||||
|   TEST_CHECK(trie_add(ct, s2, entry2) == Ok); | ||||
| 
 | ||||
|   Entry *entry3; | ||||
| 
 | ||||
|   TEST_CHECK(trie_search(ct, &entry3, s1) == Ok); | ||||
|   TEST_CHECK(entry3 == entry1); | ||||
|   entry2 = NULL; | ||||
| 
 | ||||
|   TEST_CHECK(trie_search(ct, &entry3, s2) == Ok); | ||||
|   TEST_CHECK(entry3 == entry2); | ||||
| 
 | ||||
|   trie_free(ct); | ||||
| } | ||||
| 
 | ||||
| void test_search_not_present() { | ||||
|   TRIE_INIT(); | ||||
| 
 | ||||
|   TEST_CHECK(trie_add(ct, "this string exists", NULL) == Ok); | ||||
|   Entry *entry; | ||||
|   TEST_CHECK(trie_search(ct, &entry, "this string does not exist") == NotFound); | ||||
| 
 | ||||
|   trie_free(ct); | ||||
| } | ||||
| 
 | ||||
| void test_add_more() { | ||||
|   TRIE_INIT(); | ||||
| 
 | ||||
|   const char* one = "one"; | ||||
|   const char* two = "two"; | ||||
|   const char* twenty = "twenty"; | ||||
|   const char* twentytwo = "twentytwo"; | ||||
| 
 | ||||
|   Entry *entry = entry_new(Redirect, ""); | ||||
| 
 | ||||
|   TEST_CHECK(trie_add(ct, one, entry) == Ok); | ||||
|   TEST_CHECK(trie_add(ct, two, entry) == Ok); | ||||
|   TEST_CHECK(trie_add(ct, twenty, entry) == Ok); | ||||
|   TEST_CHECK(trie_add(ct, twentytwo, entry) == Ok); | ||||
| 
 | ||||
|   TEST_SIZE(ct, 4); | ||||
| 
 | ||||
|   Entry *entry2; | ||||
|   TEST_CHECK(trie_search(ct, &entry2, one) == Ok); | ||||
|   TEST_CHECK(entry2 == entry); | ||||
|   entry2 = NULL; | ||||
| 
 | ||||
|   TEST_CHECK(trie_search(ct, &entry2, two) == Ok); | ||||
|   TEST_CHECK(entry2 == entry); | ||||
|   entry2 = NULL; | ||||
| 
 | ||||
|   TEST_CHECK(trie_search(ct, &entry2, twenty) == Ok); | ||||
|   TEST_CHECK(entry2 == entry); | ||||
|   entry2 = NULL; | ||||
| 
 | ||||
|   TEST_CHECK(trie_search(ct, &entry2, twentytwo) == Ok); | ||||
|   TEST_CHECK(entry2 == entry); | ||||
|   entry2 = NULL; | ||||
| 
 | ||||
|   TEST_CHECK(trie_add(ct, one, NULL) == AlreadyPresent); | ||||
|   TEST_CHECK(trie_add(ct, two, NULL) == AlreadyPresent); | ||||
|   TEST_CHECK(trie_add(ct, twenty, NULL) == AlreadyPresent); | ||||
|   TEST_CHECK(trie_add(ct, twentytwo, NULL) == AlreadyPresent); | ||||
| 
 | ||||
|   trie_free(ct); | ||||
| } | ||||
| 
 | ||||
| /* void test_remove_one() { */ | ||||
| /*     Trie* ct = trie_init(); */ | ||||
| /*     TEST_CHECK(ct != NULL); */ | ||||
| 
 | ||||
| /*     const char* string = "this is a test"; */ | ||||
| /*     TEST_CHECK(trie_add(ct, string, NULL)); */ | ||||
| /*     TEST_SIZE(ct, 1); */ | ||||
| 
 | ||||
| /*     TEST_CHECK(trie_remove(ct, string)); */ | ||||
| /*     TEST_SIZE(ct, 0); */ | ||||
| 
 | ||||
| /*     trie_free(ct); */ | ||||
| /* } */ | ||||
| 
 | ||||
| /* void test_remove_more() { */ | ||||
| /*     Trie* ct = trie_init(); */ | ||||
| /*     TEST_CHECK(ct != NULL); */ | ||||
| 
 | ||||
| /*     const char* one = "one"; */ | ||||
| /*     const char* two = "two"; */ | ||||
| /*     const char* twenty = "twenty"; */ | ||||
| /*     const char* twentytwo = "twentytwo"; */ | ||||
| /*     TEST_CHECK(trie_add(ct, one, NULL)); */ | ||||
| /*     TEST_CHECK(trie_add(ct, two, NULL)); */ | ||||
| /*     TEST_CHECK(trie_add(ct, twenty, NULL)); */ | ||||
| /*     TEST_CHECK(trie_add(ct, twentytwo, NULL)); */ | ||||
| 
 | ||||
| /*     TEST_SIZE(ct, 4); */ | ||||
| 
 | ||||
| /*     TEST_CHECK(trie_remove(ct, one)); */ | ||||
| /*     TEST_CHECK(trie_remove(ct, two)); */ | ||||
| /*     TEST_CHECK(trie_remove(ct, twenty)); */ | ||||
| /*     TEST_CHECK(trie_remove(ct, twentytwo)); */ | ||||
| 
 | ||||
| /*     TEST_SIZE(ct, 0); */ | ||||
| 
 | ||||
| /*     trie_free(ct); */ | ||||
| /* } */ | ||||
| 
 | ||||
| /* void test_remove_not_present() { */ | ||||
| /*     Trie* ct = trie_init(); */ | ||||
| /*     TEST_CHECK(ct != NULL); */ | ||||
| 
 | ||||
| /*     TEST_CHECK(trie_add(ct, "this string exists", NULL)); */ | ||||
| /*     TEST_CHECK(!trie_remove(ct, "this string does not exist")); */ | ||||
| 
 | ||||
| /*     trie_free(ct); */ | ||||
| /* } */ | ||||
| 
 | ||||
| // Test seeds that are known to fail so we don't get regressions
 | ||||
| void test_fuzzy_set() { | ||||
|   FuzzyConfig configs[] = { | ||||
|     { 403318210, 5, 500}, | ||||
|     { 588218406, 16, 460}, | ||||
|     { 297512224, 21, 500}, | ||||
|     { 403318210, 5, 500} | ||||
|   }; | ||||
| 
 | ||||
|   int count = sizeof(configs) / sizeof(FuzzyConfig); | ||||
|   int res; | ||||
| 
 | ||||
|   for (int i = 0; i < count; i++) { | ||||
| res = fuzzy_test_trie_seed(configs[i]); | ||||
|     TEST_CHECK_(res == 0, | ||||
|                 "Failed config, seed = %i, len = %i, count = %i, code=%i", configs[i].seed, configs[i].word_length, configs[i].word_count, res); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| TEST_LIST = { | ||||
|         {"trie init",test_init }, | ||||
|         { "trie add one",test_add_one }, | ||||
|         { "trie add more",test_add_more }, | ||||
|         { "trie search not present",test_search_not_present}, | ||||
| 
 | ||||
|         /* { "trie remove one",test_remove_one }, */ | ||||
|         /* { "trie remove more",test_remove_more }, */ | ||||
|         /* { "trie remove not present",test_remove_not_present}, */ | ||||
|         { "trie fuzzy set", test_fuzzy_set }, | ||||
|         { NULL, NULL} | ||||
| }; | ||||
|  | @ -1,34 +0,0 @@ | |||
| #include "test.h" | ||||
| #include "trie.h" | ||||
| #include "fuzzy.h" | ||||
| 
 | ||||
| void test_fuzzy() { | ||||
|     // Randomize seed
 | ||||
|     srand(time(NULL)); | ||||
| 
 | ||||
|     FuzzyConfig config; | ||||
|     int counter = 0; | ||||
|     int res; | ||||
| 
 | ||||
|     for (int len = 1; len < 25; len += 5) { | ||||
|       for (int count = 10; count <= 500; count += 10) { | ||||
|         for (int i = 0; i < 50; i++) { | ||||
|           counter++; | ||||
| 
 | ||||
|           config.seed = rand(); | ||||
|           config.word_length = len; | ||||
|           config.word_count = count; | ||||
| 
 | ||||
| res = fuzzy_test_trie_seed(config); | ||||
|     TEST_CHECK_(res == 0, | ||||
|                 "Failed config, seed = %i, len = %i, count = %i, code = %i", config.seed, config.word_length, config.word_count, res); | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|     TEST_MSG("fuzzy tests done = %i", counter); | ||||
| } | ||||
| 
 | ||||
| TEST_LIST = { | ||||
|   { "customtrie fuzzy", test_fuzzy }, | ||||
|   { NULL, NULL} | ||||
| }; | ||||
		Loading…
	
		Reference in New Issue