chore: ran formatter; added TRIE description file
parent
1e5442379f
commit
2d7cce138d
|
@ -0,0 +1,16 @@
|
||||||
|
# Trie design
|
||||||
|
|
||||||
|
The underlying data structure is based on a combination of a ternary and a
|
||||||
|
Patricia trie.
|
||||||
|
|
||||||
|
* Nodes are classic ternary trie nodes, meaning each node contains a binary
|
||||||
|
search tree
|
||||||
|
* Each node can define a skip, like a Patricia trie, of at most 8 characters.
|
||||||
|
These skipped characters are stored directly in the structs defining the
|
||||||
|
nodes.
|
||||||
|
* While the add function relies on the fact that the input is a NULL-terminated
|
||||||
|
C string, the trie itself does not store any NULL bytes.
|
||||||
|
|
||||||
|
The goal of this datastructure is to be as optimized as possible for search
|
||||||
|
operations with short (usually < 8 characters) keys, as this is by far the most
|
||||||
|
common operation for a URL shortener/pastebin.
|
|
@ -31,8 +31,8 @@ static const std::string index_page = R"(
|
||||||
return crow::response(crow::status::UNAUTHORIZED); \
|
return crow::response(crow::status::UNAUTHORIZED); \
|
||||||
}
|
}
|
||||||
|
|
||||||
crow::response add_redirect(std::string base_url, Trie *trie,
|
crow::response add_redirect(std::string base_url, Trie *trie, const char *url,
|
||||||
const char *url, bool secure) {
|
bool secure) {
|
||||||
Entry *new_entry = entry_new(Redirect, url);
|
Entry *new_entry = entry_new(Redirect, url);
|
||||||
char *key = trie_add_random(trie, new_entry, secure);
|
char *key = trie_add_random(trie, new_entry, secure);
|
||||||
|
|
||||||
|
@ -61,8 +61,8 @@ bool store_paste(const char *key, const char *body) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
crow::response add_paste(std::string base_url, Trie *trie,
|
crow::response add_paste(std::string base_url, Trie *trie, const char *body,
|
||||||
const char *body, bool secure) {
|
bool secure) {
|
||||||
Entry *new_entry = entry_new(Paste, "");
|
Entry *new_entry = entry_new(Paste, "");
|
||||||
char *key = trie_add_random(trie, new_entry, secure);
|
char *key = trie_add_random(trie, new_entry, secure);
|
||||||
|
|
||||||
|
|
21
src/trie.c
21
src/trie.c
|
@ -218,8 +218,7 @@ Entry *trie_search(Trie *trie, const char *key) {
|
||||||
* @return true if the string wasn't present in the trie and thus added, false
|
* @return true if the string wasn't present in the trie and thus added, false
|
||||||
* otherwise
|
* otherwise
|
||||||
*/
|
*/
|
||||||
bool trie_add_no_lock(Trie *trie, const char *string,
|
bool trie_add_no_lock(Trie *trie, const char *string, Entry *entry) {
|
||||||
Entry *entry) {
|
|
||||||
size_t i = 0;
|
size_t i = 0;
|
||||||
uint8_t offset;
|
uint8_t offset;
|
||||||
TrieNode **node_ptr = &(trie->root);
|
TrieNode **node_ptr = &(trie->root);
|
||||||
|
@ -230,11 +229,13 @@ bool trie_add_no_lock(Trie *trie, const char *string,
|
||||||
offset = 0;
|
offset = 0;
|
||||||
child_node_ptr = tnode_search(*node_ptr, string[i], true);
|
child_node_ptr = tnode_search(*node_ptr, string[i], true);
|
||||||
|
|
||||||
// We've reached a NULL child, so we add the remaining part of the string here
|
// We've reached a NULL child, so we add the remaining part of the string
|
||||||
|
// here
|
||||||
if (*child_node_ptr == NULL) {
|
if (*child_node_ptr == NULL) {
|
||||||
child_node = tnode_init();
|
child_node = tnode_init();
|
||||||
|
|
||||||
while (offset < TRIE_MAX_SKIP_SIZE && string[i + 1 + offset] != DELIMITER) {
|
while (offset < TRIE_MAX_SKIP_SIZE &&
|
||||||
|
string[i + 1 + offset] != DELIMITER) {
|
||||||
child_node->string[offset] = string[i + 1 + offset];
|
child_node->string[offset] = string[i + 1 + offset];
|
||||||
offset++;
|
offset++;
|
||||||
}
|
}
|
||||||
|
@ -277,7 +278,8 @@ bool trie_add_no_lock(Trie *trie, const char *string,
|
||||||
|
|
||||||
// split_node replaces child_node as the child of node
|
// split_node replaces child_node as the child of node
|
||||||
*child_node_ptr = split_node;
|
*child_node_ptr = split_node;
|
||||||
TrieNode **new_node_ptr = tnode_search(split_node, child_node->string[offset], true);
|
TrieNode **new_node_ptr =
|
||||||
|
tnode_search(split_node, child_node->string[offset], true);
|
||||||
*new_node_ptr = child_node;
|
*new_node_ptr = child_node;
|
||||||
|
|
||||||
// child_node has now become a child of split_node, so we update its
|
// child_node has now become a child of split_node, so we update its
|
||||||
|
@ -317,8 +319,7 @@ bool trie_add_no_lock(Trie *trie, const char *string,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool trie_add_persistent(Trie *trie, const char *key,
|
bool trie_add_persistent(Trie *trie, const char *key, Entry *entry) {
|
||||||
Entry *entry) {
|
|
||||||
bool return_value = false;
|
bool return_value = false;
|
||||||
|
|
||||||
if (trie->file_path != NULL) {
|
if (trie->file_path != NULL) {
|
||||||
|
@ -417,7 +418,8 @@ char *trie_add_random(Trie *trie, Entry *entry, bool secure) {
|
||||||
/* return_value = true; */
|
/* return_value = true; */
|
||||||
|
|
||||||
/* if (res.parent != NULL) { */
|
/* if (res.parent != NULL) { */
|
||||||
/* // We're removing a full leaf, so we calculate the offset of the character */
|
/* // We're removing a full leaf, so we calculate the offset of the
|
||||||
|
* character */
|
||||||
/* // to remove from the parent */
|
/* // to remove from the parent */
|
||||||
/* if (res.child->type == 2) { */
|
/* if (res.child->type == 2) { */
|
||||||
/* size_t str_len = strlen(string); */
|
/* size_t str_len = strlen(string); */
|
||||||
|
@ -425,7 +427,8 @@ char *trie_add_random(Trie *trie, Entry *entry, bool secure) {
|
||||||
|
|
||||||
/* tnode_remove(res.parent, string[str_len - suffix_len - 1]); */
|
/* tnode_remove(res.parent, string[str_len - suffix_len - 1]); */
|
||||||
/* } */
|
/* } */
|
||||||
/* // In the other case, the character to remove from the parent is the last */
|
/* // In the other case, the character to remove from the parent is the last
|
||||||
|
*/
|
||||||
/* // character of the string */
|
/* // character of the string */
|
||||||
/* else if (res.child->size == 0) { */
|
/* else if (res.child->size == 0) { */
|
||||||
/* size_t i = 0; */
|
/* size_t i = 0; */
|
||||||
|
|
|
@ -29,12 +29,12 @@ typedef struct tinode {
|
||||||
typedef struct tnode {
|
typedef struct tnode {
|
||||||
Entry *entry;
|
Entry *entry;
|
||||||
|
|
||||||
TrieInnerNode* tree;
|
TrieInnerNode *tree;
|
||||||
uint8_t tree_size;
|
uint8_t tree_size;
|
||||||
|
|
||||||
// Skips are at most 8 characters, and are stored in the nodes
|
// Skips are at most 8 characters, and are stored in the nodes
|
||||||
char string[TRIE_MAX_SKIP_SIZE];
|
char string[TRIE_MAX_SKIP_SIZE];
|
||||||
uint8_t string_len: 4;
|
uint8_t string_len : 4;
|
||||||
|
|
||||||
bool represents : 1;
|
bool represents : 1;
|
||||||
} TrieNode;
|
} TrieNode;
|
||||||
|
@ -130,8 +130,7 @@ void tnode_free(TrieNode *node) {
|
||||||
* node represents a leaf with a string, because the struct and therefore the
|
* node represents a leaf with a string, because the struct and therefore the
|
||||||
* address is created if it doesn't exist yet.
|
* address is created if it doesn't exist yet.
|
||||||
*/
|
*/
|
||||||
TrieNode **tnode_search(TrieNode *node, const char c,
|
TrieNode **tnode_search(TrieNode *node, const char c, bool create) {
|
||||||
bool create) {
|
|
||||||
// It can happen that the node has no initialized root yet
|
// It can happen that the node has no initialized root yet
|
||||||
if (node->tree_size == 0) {
|
if (node->tree_size == 0) {
|
||||||
if (create) {
|
if (create) {
|
||||||
|
@ -197,7 +196,8 @@ TrieNode **tnode_search(TrieNode *node, const char c,
|
||||||
/* TrieNode *new_node = tnode_init(); */
|
/* TrieNode *new_node = tnode_init(); */
|
||||||
/* char key = node->ptr.string[0]; */
|
/* char key = node->ptr.string[0]; */
|
||||||
|
|
||||||
/* // There's a chance the remaining string was only 1 character, meaning the new */
|
/* // There's a chance the remaining string was only 1 character, meaning the
|
||||||
|
* new */
|
||||||
/* // node doesn't have to store a string */
|
/* // node doesn't have to store a string */
|
||||||
/* if (node->ptr.string[1] != DELIMITER) { */
|
/* if (node->ptr.string[1] != DELIMITER) { */
|
||||||
/* tnode_set_string(new_node, node->ptr.string + 1); */
|
/* tnode_set_string(new_node, node->ptr.string + 1); */
|
||||||
|
|
Loading…
Reference in New Issue