#include #include #include #include #include #include "ternarytrie.h" #include "ternarytrie_node.c" typedef struct ttrie { TernaryTrieNode *root; size_t size; char* file_path; pthread_rwlock_t lock; } TernaryTrie; /** * Allocate and initialize an empty TernaryTrie * * @return pointer to the empty TernaryTrie */ TernaryTrie *ternarytrie_init() { TernaryTrie *trie = calloc(1, sizeof(TernaryTrie)); trie->root = ttnode_init(); pthread_rwlock_init(&trie->lock, NULL); return trie; } /** * De-allocate a TernaryTree by freeing its entire underlying structure. * * @param trie trie to free */ void ternarytrie_free(TernaryTrie *trie) { ttnode_free(trie->root); free(trie); } bool ternarytrie_add_internal(TernaryTrie *trie, const char *string, const char *payload); void ternarytrie_populate(TernaryTrie *trie, const char *file_path) { trie->file_path = my_strdup(file_path); FILE* fp = fopen(file_path, "r"); // TODO properly handle this if (fp == NULL) { return; } // We read in lines of at most 8192 characters (sounds like enough) char buffer[8192]; int i, j; while (fgets(buffer, 8192, fp)) { printf("%s", buffer); // Find index of space character i = 0; while (buffer[i] != ' ') { i++; } // Split the buffer into two strings, the key and the payload buffer[i] = '\0'; j = i + 1; // Now remove the newline character while (buffer[j] != '\n') { j++; } buffer[j] = '\0'; ternarytrie_add_internal(trie, buffer, buffer + i + 1); } fclose(fp); } typedef struct searchresult { TernaryTrieNode *parent; TernaryTrieNode *child; } SearchResult; SearchResult ternarytrie_search_node(TernaryTrie *trie, const char *string) { SearchResult out = {NULL, NULL}; // Edge case for empty string if (string[0] == DELIMITER) { if (trie->root->type == 1) { out.child = trie->root; } return out; } size_t i = 0; TernaryTrieNode **node_ptr = &(trie->root); TernaryTrieNode **child_ptr; do { child_ptr = ttnode_search(*node_ptr, string[i], false); // We don't have to check whether *node_ptr is NULL, because if it was // NULL, it wouldn't be in the binary tree. if (child_ptr == NULL || *child_ptr == NULL) { return out; } i++; if (string[i] == DELIMITER || (*child_ptr)->type == 2) { break; } node_ptr = child_ptr; } while (1); if ((*child_ptr)->type == 2) { if (string[i] != DELIMITER && strcmp(string + i, (*child_ptr)->ptr.string) == 0) { out.child = *child_ptr; out.parent = *node_ptr; } } // Here we know we've traversed through the entire string and have arrived at // a node that isn't a full leaf else if ((*child_ptr)->type == 1) { out.child = *child_ptr; out.parent = *node_ptr; } return out; } /** * Returns whether the given string is present in the trie. * * @param trie trie to look in * @param string string to look up * @return true if the string is present in the trie, false otherwise */ char * ternarytrie_search(TernaryTrie *trie, const char *string) { pthread_rwlock_rdlock(&trie->lock); SearchResult res = ternarytrie_search_node(trie, string); char* return_value = NULL; if (res.child != NULL) { return_value = res.child->payload; } pthread_rwlock_unlock(&trie->lock); return return_value; } /** * Add the given string to the TernaryTrie. * * @param trie trie to add string to * @param string string to add * @return true if the string wasn't present in the trie and thus added, false * otherwise */ bool ternarytrie_add_internal(TernaryTrie *trie, const char *string, const char *payload) { // Edge case for empty string if (string[0] == DELIMITER) { if (trie->root->type == 0) { trie->root->type = 1; trie->root->payload = my_strdup(payload); trie->size++; return true; } return false; } size_t i = 0; TernaryTrieNode **node_ptr = &(trie->root); TernaryTrieNode **new_node_ptr; do { new_node_ptr = ttnode_search(*node_ptr, string[i], true); // ttnode_search will only return NULL with create true if the node to look // in represents a full leaf. Therefore, we split the node and restart the // iteration. if (new_node_ptr == NULL) { // It's possible we've ended up in the full leaf node that represents this // string if (strcmp(string + i, (*node_ptr)->ptr.string) == 0) { return false; } ttnode_split(*node_ptr); continue; } node_ptr = new_node_ptr; // The search function has added the character to the node i++; // The next node in the string's path doesn't exist yet, so we add it to the // trie if (*node_ptr == NULL) { TernaryTrieNode *new_node = ttnode_init(); // If there's a remaining part of the string, we add it to the leaf if (string[i] != DELIMITER) { ttnode_set_string(new_node, string + i); } else { new_node->type = 1; } new_node->payload = my_strdup(payload); *node_ptr = new_node; trie->size++; return true; } } while (string[i] != DELIMITER); // If we've arrived here, we've traversed through the entire string and have // arrived at a node that already exists. // The existing node is a full leaf, so we split it and make it // represent our new string. if ((*node_ptr)->type == 2) { ttnode_split(*node_ptr); } // The string is already in the trie else if ((*node_ptr)->type == 1) { return false; } (*node_ptr)->type = 1; (*node_ptr)->payload = my_strdup(payload); trie->size++; return true; } bool ternarytrie_add_persistent(TernaryTrie *trie, const char *string, const char *payload) { bool return_value = false; if (trie->file_path != NULL) { // Easiest way to make sure we don't add duplicate entries // We use an internal function that doesn't require a read lock, as we're // already inside a write lock if (ternarytrie_search_node(trie, string).child != NULL) { return false; } FILE *fp = fopen(trie->file_path, "a"); if (fp == NULL) { return false; } fputs(string, fp); fputs(" ", fp); fputs(payload, fp); fputs("\n", fp); fclose(fp); } // This function *should* always return true. Otherwise, the function would've // exited because the string was found in the trie. return ternarytrie_add_internal(trie, string, payload); } bool ternarytrie_add(TernaryTrie *trie, const char *string, const char *payload) { pthread_rwlock_wrlock(&trie->lock); bool return_value = ternarytrie_add_persistent(trie, string, payload); pthread_rwlock_unlock(&trie->lock); return return_value; } char* ternarytrie_add_random(TernaryTrie *trie, const char *payload) { pthread_rwlock_wrlock(&trie->lock); // Generate random key bool ok = false; char *key = malloc(RANDOM_KEY_LENGTH + 1); key[RANDOM_KEY_LENGTH] = '\0'; // We naively generate new keys until we find a key that isn't in the trie // yet. With charset_len ** RANDOM_KEY_LENGTH sufficiently large, this isn't a // problem, because the chances of collisions are extremely small. while (!ok) { for (int i = 0; i < RANDOM_KEY_LENGTH; i++) { key[i] = charset[rand() % charset_len]; } ok = ternarytrie_search_node(trie, key).child == NULL; } bool res = ternarytrie_add_persistent(trie, key, payload); char *return_value; if (res) { return_value = key; } else { return_value = NULL; free(key); } pthread_rwlock_unlock(&trie->lock); return return_value; } /** * Remove the given string from a TernaryTrie. * * @param trie trie to remove string from * @param string string to remove * @return true if the string was in the trie and thus removed, false otherwise */ bool ternarytrie_remove(TernaryTrie *trie, const char *string) { pthread_rwlock_wrlock(&trie->lock); bool return_value = false; SearchResult res = ternarytrie_search_node(trie, string); if (res.child == NULL) { goto end; } trie->size--; return_value = true; if (res.parent != NULL) { // We're removing a full leaf, so we calculate the offset of the character // to remove from the parent if (res.child->type == 2) { size_t str_len = strlen(string); size_t suffix_len = strlen(res.child->ptr.string); ttnode_remove(res.parent, string[str_len - suffix_len - 1]); } // In the other case, the character to remove from the parent is the last // character of the string else if (res.child->size == 0) { size_t i = 0; while (string[i + 1] != DELIMITER) { i++; } ttnode_remove(res.parent, string[i]); } else { res.child->type = 0; goto end; } ttnode_free(res.child); } // We're in the root here else { res.child->type = 0; } end: pthread_rwlock_unlock(&trie->lock); return return_value; } /** * Return the current size of the given trie. * * @param trie trie to return size for * @return size of the trie */ size_t ternarytrie_size(TernaryTrie *trie) { return trie->size; }