#include #include #include #include #include #include "trie.h" #include "trie_entry.h" #include "trie_node.h" typedef struct ttrie { TrieNode *root; size_t size; char *file_path; pthread_rwlock_t lock; } Trie; TrieExitCode trie_add_no_lock(Trie *trie, const char *key, Entry *entry); /** * Allocate and initialize an empty Trie * * @return pointer to the empty Trie */ TrieExitCode trie_init(Trie **trie_ptr, const char *file_path) { // Allocate & initialize trie Trie *trie = calloc(1, sizeof(Trie)); trie->root = tnode_init(); pthread_rwlock_init(&trie->lock, NULL); if (file_path == NULL) { trie->file_path = NULL; *trie_ptr = trie; return Ok; } trie->file_path = strdup(file_path); // Populate trie with data from file FILE *fp = fopen(file_path, "r"); if (fp == NULL) { return FileError; } // We read in lines of at most 8192 characters (sounds like enough) char buffer[8192]; EntryType type; Entry *entry; int i, j; TrieExitCode status; while (fgets(buffer, 8192, fp)) { i = 0; // Move index in buffer until we encounter first space character while (buffer[i] != ' ') { i++; } // Split the buffer into two strings, the key and the payload buffer[i] = '\0'; type = entry_type_from_char(buffer[i + 1]); // Skip type character & its surrounding spaces j = i + 3; // Now remove the newline character while (buffer[j] != '\n') { j++; } buffer[j] = '\0'; entry = entry_new(type, buffer + i + 3); status = trie_add_no_lock(trie, buffer, entry); if (status != Ok) { trie_free(trie); return status; } } fclose(fp); *trie_ptr = trie; return Ok; } /** * De-allocate a TernaryTree by freeing its entire underlying structure. * * @param trie trie to free */ void trie_free(Trie *trie) { tnode_free(trie->root); free(trie); } typedef struct searchresult { TrieNode *parent; TrieNode *child; } SearchResult; SearchResult trie_search_node_len(Trie *trie, const char *key, size_t key_len) { SearchResult out = {NULL, NULL}; size_t i = 0; TrieNode **node_ptr = &(trie->root); TrieNode **child_ptr; do { child_ptr = tnode_search(*node_ptr, key[i], false); // We don't have to check whether *node_ptr is NULL, because if it was // NULL, it wouldn't be in the binary tree. if (child_ptr == NULL) { return out; } i++; if (memcmp((*child_ptr)->string, key + i, (*child_ptr)->string_len) != 0) { return out; } i += (*child_ptr)->string_len; if (i < key_len) { node_ptr = child_ptr; } } while (i < key_len); // At this point, we've either arrived at an empty child, or traversed through // the entire string. Therefore, all we have to do is check whether we're at // the end of the string and if node represents a string. if (i == key_len && (*child_ptr)->represents) { out.parent = *node_ptr; out.child = *child_ptr; } return out; } SearchResult trie_search_node(Trie *trie, const char *key) { return trie_search_node_len(trie, key, strlen(key)); } /** * Returns whether the given string is present in the trie. * * @param trie trie to look in * @param string string to look up * @return true if the string is present in the trie, false otherwise */ TrieExitCode trie_search_len(Trie *trie, Entry **entry_ptr, const char *key, size_t key_len) { SearchResult res = trie_search_node_len(trie, key, key_len); if (res.child == NULL) { return NotFound; } *entry_ptr = res.child->entry; return Ok; } TrieExitCode trie_search(Trie *trie, Entry **entry_ptr, const char *key) { return trie_search_len(trie, entry_ptr, key, strlen(key)); } /** * Add the given string to the Trie. * * @param trie trie to add string to * @param string string to add * @return true if the string wasn't present in the trie and thus added, false * otherwise */ TrieExitCode trie_add_len_no_lock(Trie *trie, const char *key, size_t key_len, Entry *entry) { size_t i = 0; uint8_t offset; TrieNode **node_ptr = &(trie->root); TrieNode **child_node_ptr; TrieNode *child_node; do { offset = 0; child_node_ptr = tnode_search(*node_ptr, key[i], true); i++; // We've reached a NULL child, so we add the remaining part of the string // here if (*child_node_ptr == NULL) { child_node = tnode_init(); while (offset < TRIE_MAX_SKIP_SIZE && i + offset < key_len) { offset++; } memcpy(child_node->string, key + i, offset); child_node->string_len = offset; *child_node_ptr = child_node; // If the remaining part of the string is still longer than the maximum // allowed skip length, we continue through the loop. The next iteration // will enter this if statement again, and perform the same loop, until // the string is fully added to the trie. if (i + offset < key_len) { node_ptr = child_node_ptr; i += offset; continue; } child_node->represents = true; child_node->entry = entry; trie->size++; return Ok; } while (offset < (*child_node_ptr)->string_len) { // String no longer aligns with edge, so we have to split if (key[i + offset] != (*child_node_ptr)->string[offset]) { TrieNode *split_node = tnode_init(); child_node = *child_node_ptr; // New string of the split node is the prefix that we were able // to skip if (offset > 0) { memcpy(split_node->string, child_node->string, offset); split_node->string_len = offset; } // split_node replaces child_node as the child of node *child_node_ptr = split_node; TrieNode **new_node_ptr = tnode_search(split_node, child_node->string[offset], true); *new_node_ptr = child_node; // child_node has now become a child of split_node, so we update its // string accordingely by removing the skipped prefix + the one // character that's already stored by being a child of split_node /* char *old_string = child_node->string.ptr; */ uint8_t new_skip_len = child_node->string_len - (offset + 1); if (new_skip_len > 0) { char old_string[TRIE_MAX_SKIP_SIZE]; memcpy(old_string, child_node->string + offset + 1, new_skip_len); memcpy(child_node->string, old_string, new_skip_len); } child_node->string_len = new_skip_len; // The while loop will exit either way after this has happened, as // child_node is now split_node and split_node's len is already set to // offset. break; } offset++; } node_ptr = child_node_ptr; i += offset; } while (i < key_len); if ((*child_node_ptr)->represents) { return AlreadyPresent; } (*child_node_ptr)->represents = true; (*child_node_ptr)->entry = entry; trie->size++; return Ok; } TrieExitCode trie_add_no_lock(Trie *trie, const char *key, Entry *entry) { return trie_add_len_no_lock(trie, key, strlen(key), entry); } TrieExitCode trie_add_len(Trie *trie, const char *key, size_t key_len, Entry *entry) { if (trie->file_path != NULL) { // Easiest way to make sure we don't add duplicate entries // We use an internal function that doesn't require a read lock, as we're // already inside a write lock if (trie_search_node_len(trie, key, key_len).child != NULL) { return AlreadyPresent; } FILE *fp = fopen(trie->file_path, "a"); if (fp == NULL) { return FileError; } fputs(key, fp); fputs(" ", fp); fputc(entry_type_to_char(entry->type), fp); fputs(" ", fp); fputs(entry->string, fp); fputs("\n", fp); fclose(fp); } // This function *should* always return Ok. Otherwise, the function would've // exited because the string was found in the trie. return trie_add_len_no_lock(trie, key, key_len, entry); } TrieExitCode trie_add(Trie *trie, const char *key, Entry *entry) { return trie_add_len(trie, key, strlen(key), entry); } TrieExitCode trie_add_random(Trie *trie, char **key_ptr, Entry *entry, bool secure) { // Generate random key bool ok = false; int key_length = secure ? RANDOM_KEY_LENGTH_LONG : RANDOM_KEY_LENGTH_SHORT; char *key = malloc(key_length + 1); key[key_length] = '\0'; // We naively generate new keys until we find a key that isn't in the trie // yet. With charset_len ** RANDOM_KEY_LENGTH sufficiently large, this isn't a // problem, because the chances of collisions are extremely small. while (!ok) { for (int i = 0; i < key_length; i++) { key[i] = charset[rand() % charset_len]; } ok = trie_search_node(trie, key).child == NULL; } TrieExitCode return_value = trie_add(trie, key, entry); if (return_value == Ok) { *key_ptr = key; } else { free(key); } return return_value; } /** * Remove the given string from a Trie. * * @param trie trie to remove string from * @param string string to remove * @return true if the string was in the trie and thus removed, false otherwise */ /* bool trie_remove(Trie *trie, const char *string) { */ /* pthread_rwlock_wrlock(&trie->lock); */ /* bool return_value = false; */ /* SearchResult res = trie_search_node(trie, string); */ /* if (res.child == NULL) { */ /* goto end; */ /* } */ /* trie->size--; */ /* return_value = true; */ /* if (res.parent != NULL) { */ /* // We're removing a full leaf, so we calculate the offset of the * character */ /* // to remove from the parent */ /* if (res.child->type == 2) { */ /* size_t str_len = strlen(string); */ /* size_t suffix_len = strlen(res.child->ptr.string); */ /* tnode_remove(res.parent, string[str_len - suffix_len - 1]); */ /* } */ /* // In the other case, the character to remove from the parent is the last */ /* // character of the string */ /* else if (res.child->size == 0) { */ /* size_t i = 0; */ /* while (string[i + 1] != DELIMITER) { */ /* i++; */ /* } */ /* tnode_remove(res.parent, string[i]); */ /* } else { */ /* res.child->type = 0; */ /* goto end; */ /* } */ /* tnode_free(res.child); */ /* } */ /* // We're in the root here */ /* else { */ /* res.child->type = 0; */ /* } */ /* end: */ /* pthread_rwlock_unlock(&trie->lock); */ /* return return_value; */ /* } */ /** * Return the current size of the given trie. * * @param trie trie to return size for * @return size of the trie */ size_t trie_size(Trie *trie) { return trie->size; } int trie_rlock(Trie *trie) { return pthread_rwlock_rdlock(&trie->lock); } int trie_wlock(Trie *trie) { return pthread_rwlock_wrlock(&trie->lock); } int trie_unlock(Trie *trie) { return pthread_rwlock_unlock(&trie->lock); }