Compare commits
No commits in common. "4427016094ba5cd76c1fe978fdeb989638a19256" and "a2d4b970e7490ad364a7fe595beb6556457fda29" have entirely different histories.
4427016094
...
a2d4b970e7
|
|
@ -38,13 +38,9 @@ pipeline:
|
||||||
secrets:
|
secrets:
|
||||||
- minio_access_key
|
- minio_access_key
|
||||||
- minio_secret_key
|
- minio_secret_key
|
||||||
when:
|
|
||||||
branch:
|
|
||||||
exclude: [ release/* ]
|
|
||||||
event: push
|
|
||||||
|
|
||||||
publish-rel:
|
publish-rel:
|
||||||
image: 'curlimages/curl'
|
image: 'alpine:3.18.0'
|
||||||
group: publish
|
group: publish
|
||||||
commands:
|
commands:
|
||||||
- >
|
- >
|
||||||
|
|
@ -57,7 +53,5 @@ pipeline:
|
||||||
--user "Chewing_Bever:$GITEA_PASSWORD"
|
--user "Chewing_Bever:$GITEA_PASSWORD"
|
||||||
--upload-file landerctl/build/landerctl
|
--upload-file landerctl/build/landerctl
|
||||||
https://git.rustybever.be/api/packages/Chewing_Bever/generic/lander/"${CI_COMMIT_TAG}"/landerctl-"$(echo '${PLATFORM}' | sed 's:/:-:g')"
|
https://git.rustybever.be/api/packages/Chewing_Bever/generic/lander/"${CI_COMMIT_TAG}"/landerctl-"$(echo '${PLATFORM}' | sed 's:/:-:g')"
|
||||||
secrets:
|
|
||||||
- gitea_password
|
|
||||||
when:
|
when:
|
||||||
event: tag
|
event: tag
|
||||||
|
|
|
||||||
|
|
@ -7,8 +7,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||||
|
|
||||||
## [Unreleased](https://git.rustybever.be/Chewing_Bever/lander/src/branch/dev)
|
## [Unreleased](https://git.rustybever.be/Chewing_Bever/lander/src/branch/dev)
|
||||||
|
|
||||||
## [0.2.0](https://git.rustybever.be/Chewing_Bever/lander/src/tag/0.2.0)
|
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
|
|
||||||
* HTTP Loop
|
* HTTP Loop
|
||||||
|
|
|
||||||
17
README.md
17
README.md
|
|
@ -1,16 +1,5 @@
|
||||||
# Lander
|
# Lander
|
||||||
|
|
||||||
Lander is an HTTP/1.1 server that acts as a URL shortener, pastebin and
|
|
||||||
file-sharing service. It's written from the ground up in C, complete with an
|
|
||||||
HTTP framework built on top of an event loop implementation based on [Build
|
|
||||||
Your Own Redis with C/C++](https://build-your-own.org/redis/). Lookup of
|
|
||||||
entries is done using an in-memory trie data structure, and on-disk storage
|
|
||||||
uses a custom binary database format.
|
|
||||||
|
|
||||||
The codebase uses one thirdparty library, namely
|
|
||||||
[picohttpparser](https://github.com/h2o/picohttpparser) for parsing HTTP
|
|
||||||
requests.
|
|
||||||
|
|
||||||
## The idea
|
## The idea
|
||||||
|
|
||||||
A URL shortener has always been on my list of things I'd like to write myself.
|
A URL shortener has always been on my list of things I'd like to write myself.
|
||||||
|
|
@ -21,6 +10,12 @@ different tries (Patricia trie, ternary trie, and a custom one). Considering
|
||||||
these are efficient string-based search trees, this gave me the idea to use it
|
these are efficient string-based search trees, this gave me the idea to use it
|
||||||
as the backend for a URL shortener!
|
as the backend for a URL shortener!
|
||||||
|
|
||||||
|
This implementation currently uses a ternary trie as its search tree. The
|
||||||
|
persistence model is very simple; I simply append a line to a text file every
|
||||||
|
time a URL is added, and add the lines of this file to the trie on startup. The
|
||||||
|
trie is stored completely im memory, and no I/O operations are required when
|
||||||
|
requesting a redirect. This makes the server very fast.
|
||||||
|
|
||||||
## The name
|
## The name
|
||||||
|
|
||||||
I gave up giving my projects original names a long time ago, so now I just use
|
I gave up giving my projects original names a long time ago, so now I just use
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
VERSION := 0.2.0
|
VERSION := 0.1.0
|
||||||
|
|
||||||
BIN_FILENAME = lander
|
BIN_FILENAME = lander
|
||||||
|
|
||||||
|
|
@ -7,7 +7,7 @@ SRC_DIR = src
|
||||||
TEST_DIR = test
|
TEST_DIR = test
|
||||||
THIRDPARTY_DIR = thirdparty
|
THIRDPARTY_DIR = thirdparty
|
||||||
|
|
||||||
INC_DIRS = include $(THIRDPARTY_DIR)/include lsm/include
|
INC_DIRS = include $(THIRDPARTY_DIR)/include trie/include lsm/include
|
||||||
LIBS = m lsm
|
LIBS = m lsm
|
||||||
LIB_DIRS = ./lsm/build
|
LIB_DIRS = ./lsm/build
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,95 @@
|
||||||
|
# https://spin.atomicobject.com/2016/08/26/makefile-c-projects/ was a great
|
||||||
|
# base for this Makefile
|
||||||
|
|
||||||
|
-include config.mk
|
||||||
|
|
||||||
|
LIB := $(BUILD_DIR)/$(LIB_FILENAME)
|
||||||
|
|
||||||
|
SRCS != find '$(SRC_DIR)' -iname '*.c'
|
||||||
|
SRCS_H != find $(INC_DIRS) -iname '*.h'
|
||||||
|
SRCS_H_INTERNAL != find $(SRC_DIR) -iname '*.h'
|
||||||
|
SRCS_TEST != find '$(TEST_DIR)' -iname '*.c'
|
||||||
|
|
||||||
|
OBJS := $(SRCS:%=$(BUILD_DIR)/%.o)
|
||||||
|
OBJS_TEST := $(SRCS_TEST:%=$(BUILD_DIR)/%.o)
|
||||||
|
DEPS := $(SRCS:%=$(BUILD_DIR)/%.d) $(SRCS_TEST:%=$(BUILD_DIR)/%.d)
|
||||||
|
|
||||||
|
BINS_TEST := $(OBJS_TEST:%.c.o=%)
|
||||||
|
TARGETS_TEST := $(BINS_TEST:%=test-%)
|
||||||
|
TARGETS_MEM_TEST := $(BINS_TEST:%=test-mem-%)
|
||||||
|
|
||||||
|
_CFLAGS := $(addprefix -I,$(INC_DIRS)) $(CFLAGS) -Wall -Wextra
|
||||||
|
|
||||||
|
.PHONY: all
|
||||||
|
all: lib
|
||||||
|
|
||||||
|
|
||||||
|
# =====COMPILATION=====
|
||||||
|
# Utility used by the CI to lint
|
||||||
|
.PHONY: objs
|
||||||
|
objs: $(OBJS)
|
||||||
|
|
||||||
|
.PHONY: lib
|
||||||
|
lib: $(LIB)
|
||||||
|
$(LIB): $(OBJS)
|
||||||
|
ar -rcs $@ $(OBJS)
|
||||||
|
|
||||||
|
$(BUILD_DIR)/$(SRC_DIR)/%.c.o: $(SRC_DIR)/%.c
|
||||||
|
mkdir -p $(dir $@)
|
||||||
|
$(CC) -c $(_CFLAGS) $< -o $@
|
||||||
|
|
||||||
|
|
||||||
|
# =====TESTING=====
|
||||||
|
.PHONY: test
|
||||||
|
test: $(TARGETS_TEST)
|
||||||
|
|
||||||
|
.PHONY: test-mem
|
||||||
|
test-mem: $(TARGETS_MEM_TEST)
|
||||||
|
|
||||||
|
.PHONY: $(TARGETS_TEST)
|
||||||
|
$(TARGETS_TEST): test-%: %
|
||||||
|
./$^
|
||||||
|
|
||||||
|
.PHONY: $(TARGETS_MEM_TEST)
|
||||||
|
$(TARGETS_MEM_TEST): test-mem-%: %
|
||||||
|
valgrind --tool=memcheck --error-exitcode=1 --track-origins=yes --leak-check=full ./$^
|
||||||
|
|
||||||
|
.PHONY: build-test
|
||||||
|
build-test: $(BINS_TEST)
|
||||||
|
|
||||||
|
$(BINS_TEST): %: %.c.o $(LIB)
|
||||||
|
$(CC) \
|
||||||
|
$^ -o $@
|
||||||
|
|
||||||
|
# Along with the include directory, each test includes $(TEST_DIR) (which
|
||||||
|
# contains the acutest.h header file), and the src directory of the module it's
|
||||||
|
# testing. This allows tests to access internal methods, which aren't publicly
|
||||||
|
# exposed.
|
||||||
|
$(BUILD_DIR)/$(TEST_DIR)/%.c.o: $(TEST_DIR)/%.c
|
||||||
|
mkdir -p $(dir $@)
|
||||||
|
$(CC) $(_CFLAGS) -I$(TEST_DIR) \
|
||||||
|
-I$(dir $(@:$(BUILD_DIR)/$(TEST_DIR)/%=$(SRC_DIR)/%)) \
|
||||||
|
-c $< -o $@
|
||||||
|
|
||||||
|
# =====MAINTENANCE=====
|
||||||
|
.PHONY: lint
|
||||||
|
lint:
|
||||||
|
clang-format -n --Werror $(SRCS) $(SRCS_H) $(SRCS_H_INTERNAL)
|
||||||
|
|
||||||
|
.PHONY: fmt
|
||||||
|
fmt:
|
||||||
|
clang-format -i $(SRCS) $(SRCS_H) $(SRCS_H_INTERNAL)
|
||||||
|
|
||||||
|
.PHONY: clean
|
||||||
|
clean:
|
||||||
|
rm -rf $(BUILD_DIR)
|
||||||
|
|
||||||
|
|
||||||
|
.PHONY: bear
|
||||||
|
bear: clean
|
||||||
|
bear -- make
|
||||||
|
bear --append -- make build-test
|
||||||
|
|
||||||
|
|
||||||
|
# Make make aware of the .d files
|
||||||
|
-include $(DEPS)
|
||||||
|
|
@ -0,0 +1,16 @@
|
||||||
|
# Trie design
|
||||||
|
|
||||||
|
The underlying data structure is based on a combination of a ternary and a
|
||||||
|
Patricia trie.
|
||||||
|
|
||||||
|
* Nodes are classic ternary trie nodes, meaning each node contains a binary
|
||||||
|
search tree
|
||||||
|
* Each node can define a skip, like a Patricia trie, of at most 8 characters.
|
||||||
|
These skipped characters are stored directly in the structs defining the
|
||||||
|
nodes.
|
||||||
|
* While the add function relies on the fact that the input is a NULL-terminated
|
||||||
|
C string, the trie itself does not store any NULL bytes.
|
||||||
|
|
||||||
|
The goal of this datastructure is to be as optimized as possible for search
|
||||||
|
operations with short (usually < 8 characters) keys, as this is by far the most
|
||||||
|
common operation for a URL shortener/pastebin.
|
||||||
|
|
@ -0,0 +1,13 @@
|
||||||
|
LIB_FILENAME = libtrie.a
|
||||||
|
|
||||||
|
BUILD_DIR = build
|
||||||
|
SRC_DIR = src
|
||||||
|
TEST_DIR = test
|
||||||
|
INC_DIRS = include
|
||||||
|
|
||||||
|
# -MMD: generate a .d file for every source file. This file can be imported by
|
||||||
|
# make and makes make aware that a header file has been changed, ensuring an
|
||||||
|
# object file is also recompiled if only a header is changed.
|
||||||
|
# -MP: generate a dummy target for every header file (according to the docs it
|
||||||
|
# prevents some errors when removing header files)
|
||||||
|
CFLAGS = -MMD -MP -g
|
||||||
|
|
@ -0,0 +1,148 @@
|
||||||
|
#ifndef AD3_TERNARYTRIE
|
||||||
|
#define AD3_TERNARYTRIE
|
||||||
|
|
||||||
|
#define ALPHABET_SIZE 256
|
||||||
|
#define DELIMITER '\0'
|
||||||
|
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
|
||||||
|
|
||||||
|
// Should not be higher than 254 or stuff will break
|
||||||
|
#define TRIE_MAX_SKIP_SIZE 8
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The implementation of a Ternary Trie.
|
||||||
|
*
|
||||||
|
* Each node should be represented by a binary tree in order to reduce the
|
||||||
|
* memory usage.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
static const char charset[] =
|
||||||
|
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
|
||||||
|
static const size_t charset_len = sizeof(charset) - 1;
|
||||||
|
|
||||||
|
// Length of randomly generated keys
|
||||||
|
#define RANDOM_KEY_LENGTH_SHORT 4
|
||||||
|
#define RANDOM_KEY_LENGTH_LONG 16
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Type definition for the struct representing the current Trie.
|
||||||
|
*
|
||||||
|
* You can (and should) redefine this in your c-file with the concrete fields.
|
||||||
|
*/
|
||||||
|
typedef struct ttrie Trie;
|
||||||
|
|
||||||
|
typedef enum entry_type { Redirect, Paste, Unknown } EntryType;
|
||||||
|
|
||||||
|
typedef struct entry {
|
||||||
|
EntryType type;
|
||||||
|
char *string;
|
||||||
|
} Entry;
|
||||||
|
|
||||||
|
typedef enum trie_exit_code {
|
||||||
|
Ok = 0,
|
||||||
|
NotFound,
|
||||||
|
AlreadyPresent,
|
||||||
|
FileError
|
||||||
|
} TrieExitCode;
|
||||||
|
|
||||||
|
Entry *entry_new(EntryType type, const char *string);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Allocate & initialize a new trie, and populate it with the data from the
|
||||||
|
* given data file.
|
||||||
|
*
|
||||||
|
* @return 0 if everything was successful, non-zero otherwise
|
||||||
|
*/
|
||||||
|
TrieExitCode trie_init(Trie **trie_ptr, const char *file_path);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* De-allocate a trie by freeing the memory occupied by this trie.
|
||||||
|
*
|
||||||
|
* @param trie which should be freed
|
||||||
|
*/
|
||||||
|
void trie_free(Trie *trie);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Search for an entry in the trie.
|
||||||
|
*
|
||||||
|
* @param trie
|
||||||
|
* @param entry_ptr pointer to Entry will be stored here, if found
|
||||||
|
* @param key key representing the entry
|
||||||
|
* @return 0 if the search was successful, 1 if not found
|
||||||
|
*/
|
||||||
|
TrieExitCode trie_search(Trie *trie, Entry **entry_ptr, const char *key);
|
||||||
|
|
||||||
|
TrieExitCode trie_search_len(Trie *trie, Entry **entry_ptr, const char *key,
|
||||||
|
size_t key_len);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add a string to this trie.
|
||||||
|
*
|
||||||
|
* @param trie
|
||||||
|
* @param key key to represent entry with
|
||||||
|
* @param entry entry to add
|
||||||
|
* @return 0 if added, 1 if already in trie, something else if other errors
|
||||||
|
*/
|
||||||
|
TrieExitCode trie_add(Trie *trie, const char *key, Entry *entry);
|
||||||
|
|
||||||
|
TrieExitCode trie_add_len(Trie *trie, const char *key, size_t key_len,
|
||||||
|
Entry *entry);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add an entry by generating a random string as the key.
|
||||||
|
*
|
||||||
|
* @param trie
|
||||||
|
* @param entry entry to add
|
||||||
|
* @param secure whether to generate a longer, more secure random key
|
||||||
|
* @return pointer to the generated key. This pointer is safe to use after
|
||||||
|
* unlocking the trie, and should be freed manually.
|
||||||
|
*/
|
||||||
|
TrieExitCode trie_add_random(Trie *trie, char **key_ptr, Entry *entry,
|
||||||
|
bool secure);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Remove an entry from this trie given its key.
|
||||||
|
*
|
||||||
|
* @param trie
|
||||||
|
* @param key key representing entry
|
||||||
|
* @return true if the entry was present and has been removed, false if it was
|
||||||
|
* not present
|
||||||
|
*/
|
||||||
|
bool trie_remove(Trie *trie, const char *key);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the number of entries in this trie.
|
||||||
|
*
|
||||||
|
* @param trie
|
||||||
|
* @return the number of entries in this trie
|
||||||
|
*/
|
||||||
|
size_t trie_size(Trie *trie);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Acquire a read lock on the trie.
|
||||||
|
*
|
||||||
|
* @return 0 if successful, non-zero otherwise (return value of
|
||||||
|
* pthread_rwlock_rdlock)
|
||||||
|
*/
|
||||||
|
int trie_rlock(Trie *trie);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Acquire a write lock on the trie.
|
||||||
|
*
|
||||||
|
* @return 0 if successful, non-zero otherwise (return value of
|
||||||
|
* pthread_rwlock_wrlock)
|
||||||
|
*/
|
||||||
|
int trie_wlock(Trie *trie);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Release the lock on a trie after having acquired it beforehand.
|
||||||
|
*
|
||||||
|
* @return 0 if successful, non-zero otherwise (return value of
|
||||||
|
* pthread_rwlock_unlock)
|
||||||
|
*/
|
||||||
|
int trie_unlock(Trie *trie);
|
||||||
|
|
||||||
|
#endif // AD3_TERNARYTRIE
|
||||||
|
|
@ -0,0 +1,428 @@
|
||||||
|
#include <pthread.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "trie.h"
|
||||||
|
#include "trie_entry.h"
|
||||||
|
#include "trie_node.h"
|
||||||
|
|
||||||
|
typedef struct ttrie {
|
||||||
|
TrieNode *root;
|
||||||
|
size_t size;
|
||||||
|
char *file_path;
|
||||||
|
pthread_rwlock_t lock;
|
||||||
|
} Trie;
|
||||||
|
|
||||||
|
TrieExitCode trie_add_no_lock(Trie *trie, const char *key, Entry *entry);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Allocate and initialize an empty Trie
|
||||||
|
*
|
||||||
|
* @return pointer to the empty Trie
|
||||||
|
*/
|
||||||
|
TrieExitCode trie_init(Trie **trie_ptr, const char *file_path) {
|
||||||
|
// Allocate & initialize trie
|
||||||
|
Trie *trie = calloc(1, sizeof(Trie));
|
||||||
|
trie->root = tnode_init();
|
||||||
|
pthread_rwlock_init(&trie->lock, NULL);
|
||||||
|
|
||||||
|
if (file_path == NULL) {
|
||||||
|
trie->file_path = NULL;
|
||||||
|
*trie_ptr = trie;
|
||||||
|
return Ok;
|
||||||
|
}
|
||||||
|
|
||||||
|
trie->file_path = strdup(file_path);
|
||||||
|
|
||||||
|
// Populate trie with data from file
|
||||||
|
FILE *fp = fopen(file_path, "r");
|
||||||
|
|
||||||
|
if (fp == NULL) {
|
||||||
|
return FileError;
|
||||||
|
}
|
||||||
|
|
||||||
|
// We read in lines of at most 8192 characters (sounds like enough)
|
||||||
|
char buffer[8192];
|
||||||
|
EntryType type;
|
||||||
|
Entry *entry;
|
||||||
|
int i, j;
|
||||||
|
TrieExitCode status;
|
||||||
|
|
||||||
|
while (fgets(buffer, 8192, fp)) {
|
||||||
|
i = 0;
|
||||||
|
|
||||||
|
// Move index in buffer until we encounter first space character
|
||||||
|
while (buffer[i] != ' ') {
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Split the buffer into two strings, the key and the payload
|
||||||
|
buffer[i] = '\0';
|
||||||
|
|
||||||
|
type = entry_type_from_char(buffer[i + 1]);
|
||||||
|
|
||||||
|
// Skip type character & its surrounding spaces
|
||||||
|
j = i + 3;
|
||||||
|
|
||||||
|
// Now remove the newline character
|
||||||
|
while (buffer[j] != '\n') {
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
|
||||||
|
buffer[j] = '\0';
|
||||||
|
|
||||||
|
entry = entry_new(type, buffer + i + 3);
|
||||||
|
status = trie_add_no_lock(trie, buffer, entry);
|
||||||
|
|
||||||
|
if (status != Ok) {
|
||||||
|
trie_free(trie);
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fclose(fp);
|
||||||
|
|
||||||
|
*trie_ptr = trie;
|
||||||
|
|
||||||
|
return Ok;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* De-allocate a TernaryTree by freeing its entire underlying structure.
|
||||||
|
*
|
||||||
|
* @param trie trie to free
|
||||||
|
*/
|
||||||
|
void trie_free(Trie *trie) {
|
||||||
|
tnode_free(trie->root);
|
||||||
|
free(trie);
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef struct searchresult {
|
||||||
|
TrieNode *parent;
|
||||||
|
TrieNode *child;
|
||||||
|
} SearchResult;
|
||||||
|
|
||||||
|
SearchResult trie_search_node_len(Trie *trie, const char *key, size_t key_len) {
|
||||||
|
SearchResult out = {NULL, NULL};
|
||||||
|
|
||||||
|
size_t i = 0;
|
||||||
|
TrieNode **node_ptr = &(trie->root);
|
||||||
|
TrieNode **child_ptr;
|
||||||
|
|
||||||
|
do {
|
||||||
|
child_ptr = tnode_search(*node_ptr, key[i], false);
|
||||||
|
|
||||||
|
// We don't have to check whether *node_ptr is NULL, because if it was
|
||||||
|
// NULL, it wouldn't be in the binary tree.
|
||||||
|
if (child_ptr == NULL) {
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
i++;
|
||||||
|
|
||||||
|
if (memcmp((*child_ptr)->string, key + i, (*child_ptr)->string_len) != 0) {
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
i += (*child_ptr)->string_len;
|
||||||
|
|
||||||
|
if (i < key_len) {
|
||||||
|
node_ptr = child_ptr;
|
||||||
|
}
|
||||||
|
} while (i < key_len);
|
||||||
|
|
||||||
|
// At this point, we've either arrived at an empty child, or traversed through
|
||||||
|
// the entire string. Therefore, all we have to do is check whether we're at
|
||||||
|
// the end of the string and if node represents a string.
|
||||||
|
if (i == key_len && (*child_ptr)->represents) {
|
||||||
|
out.parent = *node_ptr;
|
||||||
|
out.child = *child_ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
SearchResult trie_search_node(Trie *trie, const char *key) {
|
||||||
|
return trie_search_node_len(trie, key, strlen(key));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns whether the given string is present in the trie.
|
||||||
|
*
|
||||||
|
* @param trie trie to look in
|
||||||
|
* @param string string to look up
|
||||||
|
* @return true if the string is present in the trie, false otherwise
|
||||||
|
*/
|
||||||
|
TrieExitCode trie_search_len(Trie *trie, Entry **entry_ptr, const char *key,
|
||||||
|
size_t key_len) {
|
||||||
|
SearchResult res = trie_search_node_len(trie, key, key_len);
|
||||||
|
|
||||||
|
if (res.child == NULL) {
|
||||||
|
return NotFound;
|
||||||
|
}
|
||||||
|
|
||||||
|
*entry_ptr = res.child->entry;
|
||||||
|
|
||||||
|
return Ok;
|
||||||
|
}
|
||||||
|
|
||||||
|
TrieExitCode trie_search(Trie *trie, Entry **entry_ptr, const char *key) {
|
||||||
|
return trie_search_len(trie, entry_ptr, key, strlen(key));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add the given string to the Trie.
|
||||||
|
*
|
||||||
|
* @param trie trie to add string to
|
||||||
|
* @param string string to add
|
||||||
|
* @return true if the string wasn't present in the trie and thus added, false
|
||||||
|
* otherwise
|
||||||
|
*/
|
||||||
|
TrieExitCode trie_add_len_no_lock(Trie *trie, const char *key, size_t key_len,
|
||||||
|
Entry *entry) {
|
||||||
|
size_t i = 0;
|
||||||
|
uint8_t offset;
|
||||||
|
TrieNode **node_ptr = &(trie->root);
|
||||||
|
TrieNode **child_node_ptr;
|
||||||
|
TrieNode *child_node;
|
||||||
|
|
||||||
|
do {
|
||||||
|
offset = 0;
|
||||||
|
child_node_ptr = tnode_search(*node_ptr, key[i], true);
|
||||||
|
|
||||||
|
i++;
|
||||||
|
|
||||||
|
// We've reached a NULL child, so we add the remaining part of the string
|
||||||
|
// here
|
||||||
|
if (*child_node_ptr == NULL) {
|
||||||
|
child_node = tnode_init();
|
||||||
|
|
||||||
|
while (offset < TRIE_MAX_SKIP_SIZE && i + offset < key_len) {
|
||||||
|
offset++;
|
||||||
|
}
|
||||||
|
|
||||||
|
memcpy(child_node->string, key + i, offset);
|
||||||
|
|
||||||
|
child_node->string_len = offset;
|
||||||
|
*child_node_ptr = child_node;
|
||||||
|
|
||||||
|
// If the remaining part of the string is still longer than the maximum
|
||||||
|
// allowed skip length, we continue through the loop. The next iteration
|
||||||
|
// will enter this if statement again, and perform the same loop, until
|
||||||
|
// the string is fully added to the trie.
|
||||||
|
if (i + offset < key_len) {
|
||||||
|
node_ptr = child_node_ptr;
|
||||||
|
i += offset;
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
child_node->represents = true;
|
||||||
|
child_node->entry = entry;
|
||||||
|
|
||||||
|
trie->size++;
|
||||||
|
return Ok;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (offset < (*child_node_ptr)->string_len) {
|
||||||
|
// String no longer aligns with edge, so we have to split
|
||||||
|
if (key[i + offset] != (*child_node_ptr)->string[offset]) {
|
||||||
|
TrieNode *split_node = tnode_init();
|
||||||
|
child_node = *child_node_ptr;
|
||||||
|
|
||||||
|
// New string of the split node is the prefix that we were able
|
||||||
|
// to skip
|
||||||
|
if (offset > 0) {
|
||||||
|
memcpy(split_node->string, child_node->string, offset);
|
||||||
|
split_node->string_len = offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
// split_node replaces child_node as the child of node
|
||||||
|
*child_node_ptr = split_node;
|
||||||
|
TrieNode **new_node_ptr =
|
||||||
|
tnode_search(split_node, child_node->string[offset], true);
|
||||||
|
*new_node_ptr = child_node;
|
||||||
|
|
||||||
|
// child_node has now become a child of split_node, so we update its
|
||||||
|
// string accordingely by removing the skipped prefix + the one
|
||||||
|
// character that's already stored by being a child of split_node
|
||||||
|
/* char *old_string = child_node->string.ptr; */
|
||||||
|
uint8_t new_skip_len = child_node->string_len - (offset + 1);
|
||||||
|
|
||||||
|
if (new_skip_len > 0) {
|
||||||
|
char old_string[TRIE_MAX_SKIP_SIZE];
|
||||||
|
memcpy(old_string, child_node->string + offset + 1, new_skip_len);
|
||||||
|
memcpy(child_node->string, old_string, new_skip_len);
|
||||||
|
}
|
||||||
|
|
||||||
|
child_node->string_len = new_skip_len;
|
||||||
|
|
||||||
|
// The while loop will exit either way after this has happened, as
|
||||||
|
// child_node is now split_node and split_node's len is already set to
|
||||||
|
// offset.
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
offset++;
|
||||||
|
}
|
||||||
|
|
||||||
|
node_ptr = child_node_ptr;
|
||||||
|
|
||||||
|
i += offset;
|
||||||
|
} while (i < key_len);
|
||||||
|
|
||||||
|
if ((*child_node_ptr)->represents) {
|
||||||
|
return AlreadyPresent;
|
||||||
|
}
|
||||||
|
|
||||||
|
(*child_node_ptr)->represents = true;
|
||||||
|
(*child_node_ptr)->entry = entry;
|
||||||
|
trie->size++;
|
||||||
|
return Ok;
|
||||||
|
}
|
||||||
|
|
||||||
|
TrieExitCode trie_add_no_lock(Trie *trie, const char *key, Entry *entry) {
|
||||||
|
return trie_add_len_no_lock(trie, key, strlen(key), entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
TrieExitCode trie_add_len(Trie *trie, const char *key, size_t key_len,
|
||||||
|
Entry *entry) {
|
||||||
|
if (trie->file_path != NULL) {
|
||||||
|
// Easiest way to make sure we don't add duplicate entries
|
||||||
|
// We use an internal function that doesn't require a read lock, as we're
|
||||||
|
// already inside a write lock
|
||||||
|
if (trie_search_node_len(trie, key, key_len).child != NULL) {
|
||||||
|
return AlreadyPresent;
|
||||||
|
}
|
||||||
|
|
||||||
|
FILE *fp = fopen(trie->file_path, "a");
|
||||||
|
|
||||||
|
if (fp == NULL) {
|
||||||
|
return FileError;
|
||||||
|
}
|
||||||
|
|
||||||
|
fputs(key, fp);
|
||||||
|
fputs(" ", fp);
|
||||||
|
fputc(entry_type_to_char(entry->type), fp);
|
||||||
|
fputs(" ", fp);
|
||||||
|
fputs(entry->string, fp);
|
||||||
|
fputs("\n", fp);
|
||||||
|
|
||||||
|
fclose(fp);
|
||||||
|
}
|
||||||
|
|
||||||
|
// This function *should* always return Ok. Otherwise, the function would've
|
||||||
|
// exited because the string was found in the trie.
|
||||||
|
return trie_add_len_no_lock(trie, key, key_len, entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
TrieExitCode trie_add(Trie *trie, const char *key, Entry *entry) {
|
||||||
|
return trie_add_len(trie, key, strlen(key), entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
TrieExitCode trie_add_random(Trie *trie, char **key_ptr, Entry *entry,
|
||||||
|
bool secure) {
|
||||||
|
// Generate random key
|
||||||
|
bool ok = false;
|
||||||
|
int key_length = secure ? RANDOM_KEY_LENGTH_LONG : RANDOM_KEY_LENGTH_SHORT;
|
||||||
|
char *key = malloc(key_length + 1);
|
||||||
|
key[key_length] = '\0';
|
||||||
|
|
||||||
|
// We naively generate new keys until we find a key that isn't in the trie
|
||||||
|
// yet. With charset_len ** RANDOM_KEY_LENGTH sufficiently large, this isn't a
|
||||||
|
// problem, because the chances of collisions are extremely small.
|
||||||
|
while (!ok) {
|
||||||
|
for (int i = 0; i < key_length; i++) {
|
||||||
|
key[i] = charset[rand() % charset_len];
|
||||||
|
}
|
||||||
|
|
||||||
|
ok = trie_search_node(trie, key).child == NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
TrieExitCode return_value = trie_add(trie, key, entry);
|
||||||
|
|
||||||
|
if (return_value == Ok) {
|
||||||
|
*key_ptr = key;
|
||||||
|
} else {
|
||||||
|
free(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
return return_value;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Remove the given string from a Trie.
|
||||||
|
*
|
||||||
|
* @param trie trie to remove string from
|
||||||
|
* @param string string to remove
|
||||||
|
* @return true if the string was in the trie and thus removed, false otherwise
|
||||||
|
*/
|
||||||
|
/* bool trie_remove(Trie *trie, const char *string) { */
|
||||||
|
/* pthread_rwlock_wrlock(&trie->lock); */
|
||||||
|
|
||||||
|
/* bool return_value = false; */
|
||||||
|
|
||||||
|
/* SearchResult res = trie_search_node(trie, string); */
|
||||||
|
|
||||||
|
/* if (res.child == NULL) { */
|
||||||
|
/* goto end; */
|
||||||
|
/* } */
|
||||||
|
|
||||||
|
/* trie->size--; */
|
||||||
|
/* return_value = true; */
|
||||||
|
|
||||||
|
/* if (res.parent != NULL) { */
|
||||||
|
/* // We're removing a full leaf, so we calculate the offset of the
|
||||||
|
* character */
|
||||||
|
/* // to remove from the parent */
|
||||||
|
/* if (res.child->type == 2) { */
|
||||||
|
/* size_t str_len = strlen(string); */
|
||||||
|
/* size_t suffix_len = strlen(res.child->ptr.string); */
|
||||||
|
|
||||||
|
/* tnode_remove(res.parent, string[str_len - suffix_len - 1]); */
|
||||||
|
/* } */
|
||||||
|
/* // In the other case, the character to remove from the parent is the last
|
||||||
|
*/
|
||||||
|
/* // character of the string */
|
||||||
|
/* else if (res.child->size == 0) { */
|
||||||
|
/* size_t i = 0; */
|
||||||
|
|
||||||
|
/* while (string[i + 1] != DELIMITER) { */
|
||||||
|
/* i++; */
|
||||||
|
/* } */
|
||||||
|
|
||||||
|
/* tnode_remove(res.parent, string[i]); */
|
||||||
|
/* } else { */
|
||||||
|
/* res.child->type = 0; */
|
||||||
|
|
||||||
|
/* goto end; */
|
||||||
|
/* } */
|
||||||
|
|
||||||
|
/* tnode_free(res.child); */
|
||||||
|
/* } */
|
||||||
|
/* // We're in the root here */
|
||||||
|
/* else { */
|
||||||
|
/* res.child->type = 0; */
|
||||||
|
/* } */
|
||||||
|
|
||||||
|
/* end: */
|
||||||
|
/* pthread_rwlock_unlock(&trie->lock); */
|
||||||
|
|
||||||
|
/* return return_value; */
|
||||||
|
/* } */
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the current size of the given trie.
|
||||||
|
*
|
||||||
|
* @param trie trie to return size for
|
||||||
|
* @return size of the trie
|
||||||
|
*/
|
||||||
|
size_t trie_size(Trie *trie) { return trie->size; }
|
||||||
|
|
||||||
|
int trie_rlock(Trie *trie) { return pthread_rwlock_rdlock(&trie->lock); }
|
||||||
|
|
||||||
|
int trie_wlock(Trie *trie) { return pthread_rwlock_wrlock(&trie->lock); }
|
||||||
|
|
||||||
|
int trie_unlock(Trie *trie) { return pthread_rwlock_unlock(&trie->lock); }
|
||||||
|
|
@ -0,0 +1,37 @@
|
||||||
|
#include "trie_entry.h"
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
EntryType entry_type_from_char(char c) {
|
||||||
|
switch (c) {
|
||||||
|
case '0':
|
||||||
|
return Redirect;
|
||||||
|
case '1':
|
||||||
|
return Paste;
|
||||||
|
default:
|
||||||
|
return Unknown;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
char entry_type_to_char(EntryType et) {
|
||||||
|
switch (et) {
|
||||||
|
case Redirect:
|
||||||
|
return '0';
|
||||||
|
case Paste:
|
||||||
|
return '1';
|
||||||
|
default:
|
||||||
|
return '\0';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Entry *entry_new(EntryType type, const char *string) {
|
||||||
|
Entry *entry = malloc(sizeof(Entry));
|
||||||
|
entry->type = type;
|
||||||
|
|
||||||
|
if (string != NULL) {
|
||||||
|
entry->string = strdup(string);
|
||||||
|
} else {
|
||||||
|
entry->string = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return entry;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,7 @@
|
||||||
|
#include "trie.h"
|
||||||
|
|
||||||
|
EntryType entry_type_from_char(char c);
|
||||||
|
|
||||||
|
char entry_type_to_char(EntryType et);
|
||||||
|
|
||||||
|
Entry *entry_new(EntryType type, const char *string);
|
||||||
|
|
@ -0,0 +1,265 @@
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include "trie_node.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Allocate and initialize a new TrieInnerNode representing a given
|
||||||
|
* character.
|
||||||
|
*
|
||||||
|
* @param c character to represent
|
||||||
|
* @return pointer to newly allocated struct
|
||||||
|
*/
|
||||||
|
TrieInnerNode *tinode_init(char c) {
|
||||||
|
TrieInnerNode *node = calloc(1, sizeof(TrieInnerNode));
|
||||||
|
node->key = c;
|
||||||
|
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Allocate and initialize a new TrieNode.
|
||||||
|
*
|
||||||
|
* @return pointer to newly allocated struct
|
||||||
|
*/
|
||||||
|
TrieNode *tnode_init() {
|
||||||
|
TrieNode *node = malloc(sizeof(TrieNode));
|
||||||
|
|
||||||
|
node->tree_size = 0;
|
||||||
|
node->string_len = 0;
|
||||||
|
node->represents = false;
|
||||||
|
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Free a TrieInnerNode and its underlying tree structure. This should
|
||||||
|
* usually only be called on the root of a binary tree to free the entire
|
||||||
|
* structure.
|
||||||
|
*
|
||||||
|
* @param node node whose tree to free
|
||||||
|
*/
|
||||||
|
void tinode_free_cascade(TrieInnerNode *node) {
|
||||||
|
if (node->left != NULL) {
|
||||||
|
tinode_free_cascade(node->left);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (node->right != NULL) {
|
||||||
|
tinode_free_cascade(node->right);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (node->next != NULL) {
|
||||||
|
tnode_free(node->next);
|
||||||
|
}
|
||||||
|
|
||||||
|
free(node);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Free a TrieNode and its underlying tree structure.
|
||||||
|
*
|
||||||
|
* @param node node to free
|
||||||
|
*/
|
||||||
|
void tnode_free(TrieNode *node) {
|
||||||
|
if (node->tree_size > 0) {
|
||||||
|
tinode_free_cascade(node->tree);
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO properly free entry
|
||||||
|
/* if (node->payload != NULL) { */
|
||||||
|
/* free(node->payload); */
|
||||||
|
/* } */
|
||||||
|
|
||||||
|
free(node);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This function performs a lookup in the underlying binary tree of the given
|
||||||
|
* TrieNode. If found, the return value is a pointer to the memory
|
||||||
|
* location where the TrieInnerNode representing the given character
|
||||||
|
* stores its `next` field. If not found, the return value is NULL, unless
|
||||||
|
* `create` is true.
|
||||||
|
*
|
||||||
|
* NOTE: a non-NULL return value does not mean that the dereferenced value is
|
||||||
|
* also not NULL. In particular, if `create` is set to true and the function had
|
||||||
|
* to create the new node, the dereferenced value will always be NULL.
|
||||||
|
*
|
||||||
|
* @param node node to perform lookup in. If node is a full leaf, the return
|
||||||
|
* value will always be NULL, regardless of the value of create.
|
||||||
|
* @param create whether to create the TrieInnerNode if it isn't present
|
||||||
|
* yet. If this is set to true, the function will never return NULL unless the
|
||||||
|
* node represents a leaf with a string, because the struct and therefore the
|
||||||
|
* address is created if it doesn't exist yet.
|
||||||
|
*/
|
||||||
|
TrieNode **tnode_search(TrieNode *node, const char c, bool create) {
|
||||||
|
// It can happen that the node has no initialized root yet
|
||||||
|
if (node->tree_size == 0) {
|
||||||
|
if (create) {
|
||||||
|
node->tree_size++;
|
||||||
|
node->tree = tinode_init(c);
|
||||||
|
|
||||||
|
return &node->tree->next;
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
TrieInnerNode *parent = node->tree;
|
||||||
|
TrieInnerNode *child;
|
||||||
|
|
||||||
|
// Iterate through the tree until we either find the character or realize it's
|
||||||
|
// not present in the tree
|
||||||
|
// FIXME don't use while (1)
|
||||||
|
while (1) {
|
||||||
|
if (parent->key == c) {
|
||||||
|
return &parent->next;
|
||||||
|
} else if (c < parent->key) {
|
||||||
|
child = parent->left;
|
||||||
|
} else {
|
||||||
|
child = parent->right;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (child == NULL) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
parent = child;
|
||||||
|
};
|
||||||
|
|
||||||
|
// child is NULL, meaning the character isn't in the binary tree yet.
|
||||||
|
|
||||||
|
// If create is true, we create the new node so that we can still return a
|
||||||
|
// non-NULL pointer.
|
||||||
|
if (create) {
|
||||||
|
TrieInnerNode *new_node = tinode_init(c);
|
||||||
|
|
||||||
|
if (c < parent->key) {
|
||||||
|
parent->left = new_node;
|
||||||
|
} else {
|
||||||
|
parent->right = new_node;
|
||||||
|
}
|
||||||
|
|
||||||
|
node->tree_size++;
|
||||||
|
|
||||||
|
return &new_node->next;
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Split a remaining string leaf node in two. This function assumes it receives
|
||||||
|
* a full leaf as its input.
|
||||||
|
*
|
||||||
|
* @param node node to split
|
||||||
|
*/
|
||||||
|
/* void tnode_split(TrieNode *node) { */
|
||||||
|
/* TrieNode *new_node = tnode_init(); */
|
||||||
|
/* char key = node->ptr.string[0]; */
|
||||||
|
|
||||||
|
/* // There's a chance the remaining string was only 1 character, meaning the
|
||||||
|
* new */
|
||||||
|
/* // node doesn't have to store a string */
|
||||||
|
/* if (node->ptr.string[1] != DELIMITER) { */
|
||||||
|
/* tnode_set_string(new_node, node->ptr.string + 1); */
|
||||||
|
/* } else { */
|
||||||
|
/* new_node->type = 1; */
|
||||||
|
/* } */
|
||||||
|
|
||||||
|
/* new_node->entry = node->entry; */
|
||||||
|
|
||||||
|
/* node->type = 0; */
|
||||||
|
/* node->size = 0; */
|
||||||
|
/* node->entry = NULL; */
|
||||||
|
|
||||||
|
/* free(node->ptr.string); */
|
||||||
|
/* node->ptr.string = NULL; */
|
||||||
|
|
||||||
|
/* // Initialize node's binary tree with the correct character */
|
||||||
|
/* TrieNode **node_ptr = tnode_search(node, key, true); */
|
||||||
|
/* *node_ptr = new_node; */
|
||||||
|
/* } */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Remove the given character from a TrieInnerNode's subtree. The
|
||||||
|
* function assumes the character is indeed in the subtree.
|
||||||
|
*/
|
||||||
|
void tinode_remove(TrieInnerNode *node, const char c) {
|
||||||
|
TrieInnerNode **to_remove_ptr = &node;
|
||||||
|
|
||||||
|
// We use pointers to pointers here so we can later free the removed node
|
||||||
|
// without having to know what its parent is
|
||||||
|
while ((*to_remove_ptr)->key != c) {
|
||||||
|
to_remove_ptr = (c < (*to_remove_ptr)->key) ? &(*to_remove_ptr)->left
|
||||||
|
: &(*to_remove_ptr)->right;
|
||||||
|
};
|
||||||
|
|
||||||
|
// If the node isn't a leaf, we have to replace it with another
|
||||||
|
if ((*to_remove_ptr)->left != NULL || (*to_remove_ptr)->right != NULL) {
|
||||||
|
TrieInnerNode *to_replace = *to_remove_ptr;
|
||||||
|
|
||||||
|
// Replace with its only right child
|
||||||
|
if (to_replace->left == NULL) {
|
||||||
|
TrieInnerNode *to_remove = to_replace->right;
|
||||||
|
|
||||||
|
to_replace->key = to_remove->key;
|
||||||
|
to_replace->next = to_remove->next;
|
||||||
|
to_replace->left = to_remove->left;
|
||||||
|
to_replace->right = to_remove->right;
|
||||||
|
|
||||||
|
free(to_remove);
|
||||||
|
}
|
||||||
|
// Replace with its only left child
|
||||||
|
else if (to_replace->right == NULL) {
|
||||||
|
TrieInnerNode *to_remove = to_replace->left;
|
||||||
|
|
||||||
|
to_replace->key = to_remove->key;
|
||||||
|
to_replace->next = to_remove->next;
|
||||||
|
to_replace->left = to_remove->left;
|
||||||
|
to_replace->right = to_remove->right;
|
||||||
|
|
||||||
|
free(to_remove);
|
||||||
|
}
|
||||||
|
// Node has two children, so replace with successor
|
||||||
|
else {
|
||||||
|
TrieInnerNode *to_remove_parent = to_replace;
|
||||||
|
TrieInnerNode *to_remove = to_replace->right;
|
||||||
|
|
||||||
|
while (to_remove->left != NULL) {
|
||||||
|
to_remove_parent = to_remove;
|
||||||
|
to_remove = to_remove->left;
|
||||||
|
}
|
||||||
|
|
||||||
|
to_replace->key = to_remove->key;
|
||||||
|
to_replace->next = to_remove->next;
|
||||||
|
|
||||||
|
if (to_remove_parent != to_replace) {
|
||||||
|
to_remove_parent->left = to_remove->right;
|
||||||
|
} else {
|
||||||
|
to_remove_parent->right = to_remove->right;
|
||||||
|
}
|
||||||
|
|
||||||
|
free(to_remove);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// We're the leaf, so we free ourselves
|
||||||
|
else {
|
||||||
|
free(*to_remove_ptr);
|
||||||
|
*to_remove_ptr = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Remove the given character from a TrieNode, respecting the rules
|
||||||
|
* of a binary search tree. This function assumes the character is in the search
|
||||||
|
* tree.
|
||||||
|
*
|
||||||
|
* @param node node to remove character from
|
||||||
|
* @param c character to remove
|
||||||
|
*/
|
||||||
|
void tnode_remove(TrieNode *node, const char c) {
|
||||||
|
tinode_remove(node->tree, c);
|
||||||
|
|
||||||
|
node->tree_size--;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,53 @@
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "trie.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents a node of the binary tree contained within each non-leaf
|
||||||
|
* TrieNode.
|
||||||
|
*/
|
||||||
|
typedef struct tinode {
|
||||||
|
struct tinode *left;
|
||||||
|
struct tinode *right;
|
||||||
|
struct tnode *next;
|
||||||
|
char key;
|
||||||
|
} TrieInnerNode;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents a node inside a Trie. A node can be in one of three states:
|
||||||
|
* - Internal node: a node that's part of a path to a leaf node. This node will
|
||||||
|
* always have a size greater than one, and an initialized root.
|
||||||
|
* - Leaf: a node solely used to represent a string ending there. Its size is 0,
|
||||||
|
* its ptr is unitialized and represents is true.
|
||||||
|
* - Full leaf: a leaf node that contains a string. This occurs when a string is
|
||||||
|
* added whose path is not fully in the tree yet, causing its remaining suffix
|
||||||
|
* to be stored as a single node. Its size will be zero, represents its true,
|
||||||
|
* and its string pointer is initialized.
|
||||||
|
*/
|
||||||
|
typedef struct tnode {
|
||||||
|
Entry *entry;
|
||||||
|
|
||||||
|
TrieInnerNode *tree;
|
||||||
|
uint8_t tree_size;
|
||||||
|
|
||||||
|
// Skips are at most TRIE_MAX_SKIP_SIZE characters, and are stored in the
|
||||||
|
// nodes
|
||||||
|
char string[TRIE_MAX_SKIP_SIZE];
|
||||||
|
uint8_t string_len;
|
||||||
|
|
||||||
|
bool represents;
|
||||||
|
} TrieNode;
|
||||||
|
|
||||||
|
TrieInnerNode *tinode_init(char c);
|
||||||
|
|
||||||
|
TrieNode *tnode_init();
|
||||||
|
|
||||||
|
void tinode_free_cascade(TrieInnerNode *node);
|
||||||
|
|
||||||
|
void tnode_free(TrieNode *node);
|
||||||
|
|
||||||
|
TrieNode **tnode_search(TrieNode *node, const char c, bool create);
|
||||||
|
|
||||||
|
void tinode_remove(TrieInnerNode *node, const char c);
|
||||||
|
|
||||||
|
void tnode_remove(TrieNode *node, const char c);
|
||||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,205 @@
|
||||||
|
#ifndef AD3_FUZZYTEST
|
||||||
|
#define AD3_FUZZYTEST
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include "trie.h"
|
||||||
|
|
||||||
|
typedef struct fuzzyconfig {
|
||||||
|
int seed;
|
||||||
|
int word_length;
|
||||||
|
int word_count;
|
||||||
|
} FuzzyConfig;
|
||||||
|
|
||||||
|
void random_clean_string(char* s, int len) {
|
||||||
|
char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789,?";
|
||||||
|
int charset_len = strlen(charset);
|
||||||
|
|
||||||
|
// len - 1 ensures that we can still set the null byte for the final byte
|
||||||
|
int actual_len = rand() % (len - 1);
|
||||||
|
int key;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < actual_len; i++) {
|
||||||
|
key = rand() % charset_len;
|
||||||
|
s[i] = charset[key];
|
||||||
|
}
|
||||||
|
|
||||||
|
s[i] = '\0';
|
||||||
|
}
|
||||||
|
|
||||||
|
void random_string(char* s, int len) {
|
||||||
|
int val = rand();
|
||||||
|
|
||||||
|
// String can't be an empty string as they aren't supported
|
||||||
|
s[0] = (char)(val % 255 + 1);
|
||||||
|
|
||||||
|
for (int i = 1; i < len - 1; i++) {
|
||||||
|
val = rand();
|
||||||
|
s[i] = (char)(val % 256);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Just in case no null characters were created
|
||||||
|
s[len - 1] = '\0';
|
||||||
|
}
|
||||||
|
|
||||||
|
void random_string_matrix(char** s, int count, int len) {
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
random_string(s[i], len);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
char** init_string_matrix(int count, int len) {
|
||||||
|
char** matrix = malloc(count * sizeof(char*));
|
||||||
|
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
matrix[i] = calloc(len, sizeof(char));
|
||||||
|
}
|
||||||
|
|
||||||
|
return matrix;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test a given trie implementation using randomly generated strings generated
|
||||||
|
* using a given seed.
|
||||||
|
*
|
||||||
|
* @param seed seed to use for generating random strings
|
||||||
|
* @param count how many strings to test with
|
||||||
|
* @param len maximum length of each string
|
||||||
|
* @param init_func function to creat a new trie of the wanted type
|
||||||
|
* @param free_func function to free the given trie
|
||||||
|
* @param add_func function to add a string to the given trie
|
||||||
|
* @param remove_func function to remove a string from the given trie
|
||||||
|
* @param size_func function to get the size of the given trie
|
||||||
|
* @return exit code describing failures, if any
|
||||||
|
*/
|
||||||
|
int fuzzy_test_trie_seed(FuzzyConfig conf) {
|
||||||
|
srand(conf.seed);
|
||||||
|
|
||||||
|
char** matrix = init_string_matrix(conf.word_count, conf.word_length);
|
||||||
|
random_string_matrix(matrix, conf.word_count, conf.word_length);
|
||||||
|
bool* contains = calloc(conf.word_count, sizeof(bool));
|
||||||
|
|
||||||
|
// It's possible that the string matrix contains duplicate strings
|
||||||
|
bool** contains_dedupped = calloc(conf.word_count, sizeof(bool*));
|
||||||
|
|
||||||
|
for (int i = 0; i < conf.word_count; i++) {
|
||||||
|
if (contains_dedupped[i] == NULL) {
|
||||||
|
contains_dedupped[i] = contains + i;
|
||||||
|
|
||||||
|
for (int j = i + 1; j < conf.word_count; j++) {
|
||||||
|
if (strcmp(matrix[i], matrix[j]) == 0) {
|
||||||
|
contains_dedupped[j] = contains + i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// We keep track of the size as well so that we can check whether this is
|
||||||
|
// also correct
|
||||||
|
size_t size = 0;
|
||||||
|
|
||||||
|
Trie *ct;
|
||||||
|
trie_init(&ct, NULL);
|
||||||
|
|
||||||
|
bool changed;
|
||||||
|
TrieExitCode status;
|
||||||
|
|
||||||
|
// 0: success
|
||||||
|
// 1: invalid add
|
||||||
|
// 2: invalid remove
|
||||||
|
// 3: bad size after adds
|
||||||
|
// 4: bad size after removes
|
||||||
|
int exit_code = 0;
|
||||||
|
|
||||||
|
// Add all strings to trie, checking for duplicates
|
||||||
|
for (int i = 0; i < conf.word_count; i++) {
|
||||||
|
status = trie_add(ct, matrix[i], NULL);
|
||||||
|
|
||||||
|
// if changed is false, *contains_dedupped[i] should be true, as changed
|
||||||
|
// can only be false if the string is already contained in the trie. if
|
||||||
|
// changed is true, *contains_dedupped[i] should be false, as the string
|
||||||
|
// cannot be in the trie yet.
|
||||||
|
if (status == Ok && *contains_dedupped[i]) {
|
||||||
|
exit_code = 1;
|
||||||
|
goto END;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!*contains_dedupped[i]) {
|
||||||
|
*contains_dedupped[i] = true;
|
||||||
|
size++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure size is correct
|
||||||
|
if (trie_size(ct) != size) {
|
||||||
|
printf("%i %i\n", trie_size(ct), size);
|
||||||
|
exit_code = 3;
|
||||||
|
goto END;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove all strings again, again taking duplicates into consideration
|
||||||
|
/* for (int i = 0; i < conf.word_count; i++) { */
|
||||||
|
/* changed = remove_func(ct, matrix[i]); */
|
||||||
|
|
||||||
|
/* // The string shouldn't be in the trie, yet another add operation */
|
||||||
|
/* // says it added it as well */
|
||||||
|
/* if (changed != *contains_dedupped[i]) { */
|
||||||
|
/* exit_code = 2; */
|
||||||
|
/* goto END; */
|
||||||
|
/* } */
|
||||||
|
|
||||||
|
/* if (*contains_dedupped[i]) { */
|
||||||
|
/* *contains_dedupped[i] = false; */
|
||||||
|
/* size--; */
|
||||||
|
/* } */
|
||||||
|
/* } */
|
||||||
|
|
||||||
|
// Finally, check that the trie is completely empty
|
||||||
|
/* if (size_func(ct) != 0) { */
|
||||||
|
/* exit_code = 4; */
|
||||||
|
/* } */
|
||||||
|
|
||||||
|
END:
|
||||||
|
trie_free(ct);
|
||||||
|
|
||||||
|
// Even testing functions should properly free memory
|
||||||
|
free(contains);
|
||||||
|
free(contains_dedupped);
|
||||||
|
|
||||||
|
for (int i = 0; i < conf.word_count; i++) {
|
||||||
|
free(matrix[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
free(matrix);
|
||||||
|
|
||||||
|
return exit_code;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Same as fuzzy_test_trie_seed, except that the seed is randomly generated.
|
||||||
|
*
|
||||||
|
* @param count how many strings to test with
|
||||||
|
* @param len maximum length of each string
|
||||||
|
* @param init_func function to creat a new trie of the wanted type
|
||||||
|
* @param free_func function to free the given trie
|
||||||
|
* @param add_func function to add a string to the given trie
|
||||||
|
* @param remove_func function to remove a string from the given trie
|
||||||
|
* @param size_func function to get the size of the given trie
|
||||||
|
* @return the generated seed if the test wasn't successful, -1 otherwise.
|
||||||
|
*/
|
||||||
|
/* int fuzzy_test_trie(int count, int len, void* (*init_func) (), void (*free_func) (void*), bool (*add_func) (void*, char*), bool (*remove_func) (void*, char*), int (*size_func) (void*)) { */
|
||||||
|
/* int seed = rand(); */
|
||||||
|
/* bool succeeded = fuzzy_test_trie_seed(seed, count, len, init_func, free_func, add_func, remove_func, size_func); */
|
||||||
|
|
||||||
|
/* if (!succeeded) { */
|
||||||
|
/* return seed; */
|
||||||
|
/* } */
|
||||||
|
|
||||||
|
/* return -1; */
|
||||||
|
/* } */
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,189 @@
|
||||||
|
#include "test.h"
|
||||||
|
#include "trie.h"
|
||||||
|
#include "fuzzy.h"
|
||||||
|
|
||||||
|
#define TEST_SIZE(ct, size) \
|
||||||
|
TEST_CHECK(trie_size(ct) == size); \
|
||||||
|
TEST_MSG("Size: %zu", trie_size(ct))
|
||||||
|
|
||||||
|
# define TRIE_INIT() \
|
||||||
|
Trie *ct; \
|
||||||
|
trie_init(&ct, NULL); \
|
||||||
|
TEST_CHECK(ct != NULL)
|
||||||
|
|
||||||
|
void test_init() {
|
||||||
|
TRIE_INIT();
|
||||||
|
TEST_SIZE(ct, 0);
|
||||||
|
trie_free(ct);
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_add_one() {
|
||||||
|
TRIE_INIT();
|
||||||
|
|
||||||
|
Entry *entry = entry_new(Redirect, "");
|
||||||
|
const char* string = "this is a test";
|
||||||
|
|
||||||
|
TEST_CHECK(trie_add(ct, string, entry) == Ok);
|
||||||
|
Entry *entry2;
|
||||||
|
TEST_CHECK(trie_search(ct, &entry2, string) == Ok);
|
||||||
|
TEST_CHECK(entry == entry2);
|
||||||
|
TEST_SIZE(ct, 1);
|
||||||
|
trie_free(ct);
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_add_prefix() {
|
||||||
|
TRIE_INIT();
|
||||||
|
|
||||||
|
const char *s1 = "halloween-2022";
|
||||||
|
const char *s2 = "halloween-202";
|
||||||
|
|
||||||
|
Entry *entry1 = entry_new(Redirect, "");
|
||||||
|
Entry *entry2 = entry_new(Redirect, "");
|
||||||
|
|
||||||
|
TEST_CHECK(trie_add(ct, s1, entry1) == Ok);
|
||||||
|
TEST_CHECK(trie_add(ct, s2, entry2) == Ok);
|
||||||
|
|
||||||
|
Entry *entry3;
|
||||||
|
|
||||||
|
TEST_CHECK(trie_search(ct, &entry3, s1) == Ok);
|
||||||
|
TEST_CHECK(entry3 == entry1);
|
||||||
|
entry2 = NULL;
|
||||||
|
|
||||||
|
TEST_CHECK(trie_search(ct, &entry3, s2) == Ok);
|
||||||
|
TEST_CHECK(entry3 == entry2);
|
||||||
|
|
||||||
|
trie_free(ct);
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_search_not_present() {
|
||||||
|
TRIE_INIT();
|
||||||
|
|
||||||
|
TEST_CHECK(trie_add(ct, "this string exists", NULL) == Ok);
|
||||||
|
Entry *entry;
|
||||||
|
TEST_CHECK(trie_search(ct, &entry, "this string does not exist") == NotFound);
|
||||||
|
|
||||||
|
trie_free(ct);
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_add_more() {
|
||||||
|
TRIE_INIT();
|
||||||
|
|
||||||
|
const char* one = "one";
|
||||||
|
const char* two = "two";
|
||||||
|
const char* twenty = "twenty";
|
||||||
|
const char* twentytwo = "twentytwo";
|
||||||
|
|
||||||
|
Entry *entry = entry_new(Redirect, "");
|
||||||
|
|
||||||
|
TEST_CHECK(trie_add(ct, one, entry) == Ok);
|
||||||
|
TEST_CHECK(trie_add(ct, two, entry) == Ok);
|
||||||
|
TEST_CHECK(trie_add(ct, twenty, entry) == Ok);
|
||||||
|
TEST_CHECK(trie_add(ct, twentytwo, entry) == Ok);
|
||||||
|
|
||||||
|
TEST_SIZE(ct, 4);
|
||||||
|
|
||||||
|
Entry *entry2;
|
||||||
|
TEST_CHECK(trie_search(ct, &entry2, one) == Ok);
|
||||||
|
TEST_CHECK(entry2 == entry);
|
||||||
|
entry2 = NULL;
|
||||||
|
|
||||||
|
TEST_CHECK(trie_search(ct, &entry2, two) == Ok);
|
||||||
|
TEST_CHECK(entry2 == entry);
|
||||||
|
entry2 = NULL;
|
||||||
|
|
||||||
|
TEST_CHECK(trie_search(ct, &entry2, twenty) == Ok);
|
||||||
|
TEST_CHECK(entry2 == entry);
|
||||||
|
entry2 = NULL;
|
||||||
|
|
||||||
|
TEST_CHECK(trie_search(ct, &entry2, twentytwo) == Ok);
|
||||||
|
TEST_CHECK(entry2 == entry);
|
||||||
|
entry2 = NULL;
|
||||||
|
|
||||||
|
TEST_CHECK(trie_add(ct, one, NULL) == AlreadyPresent);
|
||||||
|
TEST_CHECK(trie_add(ct, two, NULL) == AlreadyPresent);
|
||||||
|
TEST_CHECK(trie_add(ct, twenty, NULL) == AlreadyPresent);
|
||||||
|
TEST_CHECK(trie_add(ct, twentytwo, NULL) == AlreadyPresent);
|
||||||
|
|
||||||
|
trie_free(ct);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* void test_remove_one() { */
|
||||||
|
/* Trie* ct = trie_init(); */
|
||||||
|
/* TEST_CHECK(ct != NULL); */
|
||||||
|
|
||||||
|
/* const char* string = "this is a test"; */
|
||||||
|
/* TEST_CHECK(trie_add(ct, string, NULL)); */
|
||||||
|
/* TEST_SIZE(ct, 1); */
|
||||||
|
|
||||||
|
/* TEST_CHECK(trie_remove(ct, string)); */
|
||||||
|
/* TEST_SIZE(ct, 0); */
|
||||||
|
|
||||||
|
/* trie_free(ct); */
|
||||||
|
/* } */
|
||||||
|
|
||||||
|
/* void test_remove_more() { */
|
||||||
|
/* Trie* ct = trie_init(); */
|
||||||
|
/* TEST_CHECK(ct != NULL); */
|
||||||
|
|
||||||
|
/* const char* one = "one"; */
|
||||||
|
/* const char* two = "two"; */
|
||||||
|
/* const char* twenty = "twenty"; */
|
||||||
|
/* const char* twentytwo = "twentytwo"; */
|
||||||
|
/* TEST_CHECK(trie_add(ct, one, NULL)); */
|
||||||
|
/* TEST_CHECK(trie_add(ct, two, NULL)); */
|
||||||
|
/* TEST_CHECK(trie_add(ct, twenty, NULL)); */
|
||||||
|
/* TEST_CHECK(trie_add(ct, twentytwo, NULL)); */
|
||||||
|
|
||||||
|
/* TEST_SIZE(ct, 4); */
|
||||||
|
|
||||||
|
/* TEST_CHECK(trie_remove(ct, one)); */
|
||||||
|
/* TEST_CHECK(trie_remove(ct, two)); */
|
||||||
|
/* TEST_CHECK(trie_remove(ct, twenty)); */
|
||||||
|
/* TEST_CHECK(trie_remove(ct, twentytwo)); */
|
||||||
|
|
||||||
|
/* TEST_SIZE(ct, 0); */
|
||||||
|
|
||||||
|
/* trie_free(ct); */
|
||||||
|
/* } */
|
||||||
|
|
||||||
|
/* void test_remove_not_present() { */
|
||||||
|
/* Trie* ct = trie_init(); */
|
||||||
|
/* TEST_CHECK(ct != NULL); */
|
||||||
|
|
||||||
|
/* TEST_CHECK(trie_add(ct, "this string exists", NULL)); */
|
||||||
|
/* TEST_CHECK(!trie_remove(ct, "this string does not exist")); */
|
||||||
|
|
||||||
|
/* trie_free(ct); */
|
||||||
|
/* } */
|
||||||
|
|
||||||
|
// Test seeds that are known to fail so we don't get regressions
|
||||||
|
void test_fuzzy_set() {
|
||||||
|
FuzzyConfig configs[] = {
|
||||||
|
{ 403318210, 5, 500},
|
||||||
|
{ 588218406, 16, 460},
|
||||||
|
{ 297512224, 21, 500},
|
||||||
|
{ 403318210, 5, 500}
|
||||||
|
};
|
||||||
|
|
||||||
|
int count = sizeof(configs) / sizeof(FuzzyConfig);
|
||||||
|
int res;
|
||||||
|
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
res = fuzzy_test_trie_seed(configs[i]);
|
||||||
|
TEST_CHECK_(res == 0,
|
||||||
|
"Failed config, seed = %i, len = %i, count = %i, code=%i", configs[i].seed, configs[i].word_length, configs[i].word_count, res);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_LIST = {
|
||||||
|
{"trie init",test_init },
|
||||||
|
{ "trie add one",test_add_one },
|
||||||
|
{ "trie add more",test_add_more },
|
||||||
|
{ "trie search not present",test_search_not_present},
|
||||||
|
|
||||||
|
/* { "trie remove one",test_remove_one }, */
|
||||||
|
/* { "trie remove more",test_remove_more }, */
|
||||||
|
/* { "trie remove not present",test_remove_not_present}, */
|
||||||
|
{ "trie fuzzy set", test_fuzzy_set },
|
||||||
|
{ NULL, NULL}
|
||||||
|
};
|
||||||
|
|
@ -0,0 +1,34 @@
|
||||||
|
#include "test.h"
|
||||||
|
#include "trie.h"
|
||||||
|
#include "fuzzy.h"
|
||||||
|
|
||||||
|
void test_fuzzy() {
|
||||||
|
// Randomize seed
|
||||||
|
srand(time(NULL));
|
||||||
|
|
||||||
|
FuzzyConfig config;
|
||||||
|
int counter = 0;
|
||||||
|
int res;
|
||||||
|
|
||||||
|
for (int len = 1; len < 25; len += 5) {
|
||||||
|
for (int count = 10; count <= 500; count += 10) {
|
||||||
|
for (int i = 0; i < 50; i++) {
|
||||||
|
counter++;
|
||||||
|
|
||||||
|
config.seed = rand();
|
||||||
|
config.word_length = len;
|
||||||
|
config.word_count = count;
|
||||||
|
|
||||||
|
res = fuzzy_test_trie_seed(config);
|
||||||
|
TEST_CHECK_(res == 0,
|
||||||
|
"Failed config, seed = %i, len = %i, count = %i, code = %i", config.seed, config.word_length, config.word_count, res);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
TEST_MSG("fuzzy tests done = %i", counter);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_LIST = {
|
||||||
|
{ "customtrie fuzzy", test_fuzzy },
|
||||||
|
{ NULL, NULL}
|
||||||
|
};
|
||||||
Loading…
Reference in New Issue