chore: ran formatter; added TRIE description file

trie-skips
Jef Roosens 2022-11-29 15:17:43 +01:00
parent 1e5442379f
commit 2d7cce138d
Signed by: Jef Roosens
GPG Key ID: B75D4F293C7052DB
4 changed files with 38 additions and 19 deletions

16
TRIE.md 100644
View File

@ -0,0 +1,16 @@
# Trie design
The underlying data structure is based on a combination of a ternary and a
Patricia trie.
* Nodes are classic ternary trie nodes, meaning each node contains a binary
search tree
* Each node can define a skip, like a Patricia trie, of at most 8 characters.
These skipped characters are stored directly in the structs defining the
nodes.
* While the add function relies on the fact that the input is a NULL-terminated
C string, the trie itself does not store any NULL bytes.
The goal of this datastructure is to be as optimized as possible for search
operations with short (usually < 8 characters) keys, as this is by far the most
common operation for a URL shortener/pastebin.

View File

@ -31,8 +31,8 @@ static const std::string index_page = R"(
return crow::response(crow::status::UNAUTHORIZED); \ return crow::response(crow::status::UNAUTHORIZED); \
} }
crow::response add_redirect(std::string base_url, Trie *trie, crow::response add_redirect(std::string base_url, Trie *trie, const char *url,
const char *url, bool secure) { bool secure) {
Entry *new_entry = entry_new(Redirect, url); Entry *new_entry = entry_new(Redirect, url);
char *key = trie_add_random(trie, new_entry, secure); char *key = trie_add_random(trie, new_entry, secure);
@ -61,8 +61,8 @@ bool store_paste(const char *key, const char *body) {
return true; return true;
} }
crow::response add_paste(std::string base_url, Trie *trie, crow::response add_paste(std::string base_url, Trie *trie, const char *body,
const char *body, bool secure) { bool secure) {
Entry *new_entry = entry_new(Paste, ""); Entry *new_entry = entry_new(Paste, "");
char *key = trie_add_random(trie, new_entry, secure); char *key = trie_add_random(trie, new_entry, secure);

View File

@ -218,8 +218,7 @@ Entry *trie_search(Trie *trie, const char *key) {
* @return true if the string wasn't present in the trie and thus added, false * @return true if the string wasn't present in the trie and thus added, false
* otherwise * otherwise
*/ */
bool trie_add_no_lock(Trie *trie, const char *string, bool trie_add_no_lock(Trie *trie, const char *string, Entry *entry) {
Entry *entry) {
size_t i = 0; size_t i = 0;
uint8_t offset; uint8_t offset;
TrieNode **node_ptr = &(trie->root); TrieNode **node_ptr = &(trie->root);
@ -230,11 +229,13 @@ bool trie_add_no_lock(Trie *trie, const char *string,
offset = 0; offset = 0;
child_node_ptr = tnode_search(*node_ptr, string[i], true); child_node_ptr = tnode_search(*node_ptr, string[i], true);
// We've reached a NULL child, so we add the remaining part of the string here // We've reached a NULL child, so we add the remaining part of the string
// here
if (*child_node_ptr == NULL) { if (*child_node_ptr == NULL) {
child_node = tnode_init(); child_node = tnode_init();
while (offset < TRIE_MAX_SKIP_SIZE && string[i + 1 + offset] != DELIMITER) { while (offset < TRIE_MAX_SKIP_SIZE &&
string[i + 1 + offset] != DELIMITER) {
child_node->string[offset] = string[i + 1 + offset]; child_node->string[offset] = string[i + 1 + offset];
offset++; offset++;
} }
@ -277,7 +278,8 @@ bool trie_add_no_lock(Trie *trie, const char *string,
// split_node replaces child_node as the child of node // split_node replaces child_node as the child of node
*child_node_ptr = split_node; *child_node_ptr = split_node;
TrieNode **new_node_ptr = tnode_search(split_node, child_node->string[offset], true); TrieNode **new_node_ptr =
tnode_search(split_node, child_node->string[offset], true);
*new_node_ptr = child_node; *new_node_ptr = child_node;
// child_node has now become a child of split_node, so we update its // child_node has now become a child of split_node, so we update its
@ -317,8 +319,7 @@ bool trie_add_no_lock(Trie *trie, const char *string,
return true; return true;
} }
bool trie_add_persistent(Trie *trie, const char *key, bool trie_add_persistent(Trie *trie, const char *key, Entry *entry) {
Entry *entry) {
bool return_value = false; bool return_value = false;
if (trie->file_path != NULL) { if (trie->file_path != NULL) {
@ -417,7 +418,8 @@ char *trie_add_random(Trie *trie, Entry *entry, bool secure) {
/* return_value = true; */ /* return_value = true; */
/* if (res.parent != NULL) { */ /* if (res.parent != NULL) { */
/* // We're removing a full leaf, so we calculate the offset of the character */ /* // We're removing a full leaf, so we calculate the offset of the
* character */
/* // to remove from the parent */ /* // to remove from the parent */
/* if (res.child->type == 2) { */ /* if (res.child->type == 2) { */
/* size_t str_len = strlen(string); */ /* size_t str_len = strlen(string); */
@ -425,7 +427,8 @@ char *trie_add_random(Trie *trie, Entry *entry, bool secure) {
/* tnode_remove(res.parent, string[str_len - suffix_len - 1]); */ /* tnode_remove(res.parent, string[str_len - suffix_len - 1]); */
/* } */ /* } */
/* // In the other case, the character to remove from the parent is the last */ /* // In the other case, the character to remove from the parent is the last
*/
/* // character of the string */ /* // character of the string */
/* else if (res.child->size == 0) { */ /* else if (res.child->size == 0) { */
/* size_t i = 0; */ /* size_t i = 0; */

View File

@ -130,8 +130,7 @@ void tnode_free(TrieNode *node) {
* node represents a leaf with a string, because the struct and therefore the * node represents a leaf with a string, because the struct and therefore the
* address is created if it doesn't exist yet. * address is created if it doesn't exist yet.
*/ */
TrieNode **tnode_search(TrieNode *node, const char c, TrieNode **tnode_search(TrieNode *node, const char c, bool create) {
bool create) {
// It can happen that the node has no initialized root yet // It can happen that the node has no initialized root yet
if (node->tree_size == 0) { if (node->tree_size == 0) {
if (create) { if (create) {
@ -197,7 +196,8 @@ TrieNode **tnode_search(TrieNode *node, const char c,
/* TrieNode *new_node = tnode_init(); */ /* TrieNode *new_node = tnode_init(); */
/* char key = node->ptr.string[0]; */ /* char key = node->ptr.string[0]; */
/* // There's a chance the remaining string was only 1 character, meaning the new */ /* // There's a chance the remaining string was only 1 character, meaning the
* new */
/* // node doesn't have to store a string */ /* // node doesn't have to store a string */
/* if (node->ptr.string[1] != DELIMITER) { */ /* if (node->ptr.string[1] != DELIMITER) { */
/* tnode_set_string(new_node, node->ptr.string + 1); */ /* tnode_set_string(new_node, node->ptr.string + 1); */