lander/tries/src/ternarytrie.c

394 lines
9.1 KiB
C

#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <pthread.h>
#include "ternarytrie.h"
#include "ternarytrie_node.c"
typedef struct ttrie {
TernaryTrieNode *root;
size_t size;
char* file_path;
pthread_rwlock_t lock;
} TernaryTrie;
/**
* Allocate and initialize an empty TernaryTrie
*
* @return pointer to the empty TernaryTrie
*/
TernaryTrie *ternarytrie_init() {
TernaryTrie *trie = calloc(1, sizeof(TernaryTrie));
trie->root = ttnode_init();
pthread_rwlock_init(&trie->lock, NULL);
return trie;
}
/**
* De-allocate a TernaryTree by freeing its entire underlying structure.
*
* @param trie trie to free
*/
void ternarytrie_free(TernaryTrie *trie) {
ttnode_free(trie->root);
free(trie);
}
bool ternarytrie_add_internal(TernaryTrie *trie, const char *string, const char *payload);
void ternarytrie_populate(TernaryTrie *trie, const char *file_path) {
trie->file_path = my_strdup(file_path);
FILE* fp = fopen(file_path, "r");
// TODO properly handle this
if (fp == NULL) {
return;
}
// We read in lines of at most 8192 characters (sounds like enough)
char buffer[8192];
int i, j;
while (fgets(buffer, 8192, fp)) {
printf("%s", buffer);
// Find index of space character
i = 0;
while (buffer[i] != ' ') {
i++;
}
// Split the buffer into two strings, the key and the payload
buffer[i] = '\0';
j = i + 1;
// Now remove the newline character
while (buffer[j] != '\n') {
j++;
}
buffer[j] = '\0';
ternarytrie_add_internal(trie, buffer, buffer + i + 1);
}
fclose(fp);
}
typedef struct searchresult {
TernaryTrieNode *parent;
TernaryTrieNode *child;
} SearchResult;
SearchResult ternarytrie_search_node(TernaryTrie *trie, const char *string) {
SearchResult out = {NULL, NULL};
// Edge case for empty string
if (string[0] == DELIMITER) {
if (trie->root->type == 1) {
out.child = trie->root;
}
return out;
}
size_t i = 0;
TernaryTrieNode **node_ptr = &(trie->root);
TernaryTrieNode **child_ptr;
do {
child_ptr = ttnode_search(*node_ptr, string[i], false);
// We don't have to check whether *node_ptr is NULL, because if it was
// NULL, it wouldn't be in the binary tree.
if (child_ptr == NULL || *child_ptr == NULL) {
return out;
}
i++;
if (string[i] == DELIMITER || (*child_ptr)->type == 2) {
break;
}
node_ptr = child_ptr;
} while (1);
if ((*child_ptr)->type == 2) {
if (string[i] != DELIMITER &&
strcmp(string + i, (*child_ptr)->ptr.string) == 0) {
out.child = *child_ptr;
out.parent = *node_ptr;
}
}
// Here we know we've traversed through the entire string and have arrived at
// a node that isn't a full leaf
else if ((*child_ptr)->type == 1) {
out.child = *child_ptr;
out.parent = *node_ptr;
}
return out;
}
/**
* Returns whether the given string is present in the trie.
*
* @param trie trie to look in
* @param string string to look up
* @return true if the string is present in the trie, false otherwise
*/
char * ternarytrie_search(TernaryTrie *trie, const char *string) {
pthread_rwlock_rdlock(&trie->lock);
SearchResult res = ternarytrie_search_node(trie, string);
char* return_value = NULL;
if (res.child != NULL) {
return_value = res.child->payload;
}
pthread_rwlock_unlock(&trie->lock);
return return_value;
}
/**
* Add the given string to the TernaryTrie.
*
* @param trie trie to add string to
* @param string string to add
* @return true if the string wasn't present in the trie and thus added, false
* otherwise
*/
bool ternarytrie_add_internal(TernaryTrie *trie, const char *string, const char *payload) {
// Edge case for empty string
if (string[0] == DELIMITER) {
if (trie->root->type == 0) {
trie->root->type = 1;
trie->root->payload = my_strdup(payload);
trie->size++;
return true;
}
return false;
}
size_t i = 0;
TernaryTrieNode **node_ptr = &(trie->root);
TernaryTrieNode **new_node_ptr;
do {
new_node_ptr = ttnode_search(*node_ptr, string[i], true);
// ttnode_search will only return NULL with create true if the node to look
// in represents a full leaf. Therefore, we split the node and restart the
// iteration.
if (new_node_ptr == NULL) {
// It's possible we've ended up in the full leaf node that represents this
// string
if (strcmp(string + i, (*node_ptr)->ptr.string) == 0) {
return false;
}
ttnode_split(*node_ptr);
continue;
}
node_ptr = new_node_ptr;
// The search function has added the character to the node
i++;
// The next node in the string's path doesn't exist yet, so we add it to the
// trie
if (*node_ptr == NULL) {
TernaryTrieNode *new_node = ttnode_init();
// If there's a remaining part of the string, we add it to the leaf
if (string[i] != DELIMITER) {
ttnode_set_string(new_node, string + i);
} else {
new_node->type = 1;
}
new_node->payload = my_strdup(payload);
*node_ptr = new_node;
trie->size++;
return true;
}
} while (string[i] != DELIMITER);
// If we've arrived here, we've traversed through the entire string and have
// arrived at a node that already exists.
// The existing node is a full leaf, so we split it and make it
// represent our new string.
if ((*node_ptr)->type == 2) {
ttnode_split(*node_ptr);
}
// The string is already in the trie
else if ((*node_ptr)->type == 1) {
return false;
}
(*node_ptr)->type = 1;
(*node_ptr)->payload = my_strdup(payload);
trie->size++;
return true;
}
bool ternarytrie_add_persistent(TernaryTrie *trie, const char *string, const char *payload) {
bool return_value = false;
if (trie->file_path != NULL) {
// Easiest way to make sure we don't add duplicate entries
// We use an internal function that doesn't require a read lock, as we're
// already inside a write lock
if (ternarytrie_search_node(trie, string).child != NULL) {
return false;
}
FILE *fp = fopen(trie->file_path, "a");
if (fp == NULL) {
return false;
}
fputs(string, fp);
fputs(" ", fp);
fputs(payload, fp);
fputs("\n", fp);
fclose(fp);
}
// This function *should* always return true. Otherwise, the function would've
// exited because the string was found in the trie.
return ternarytrie_add_internal(trie, string, payload);
}
bool ternarytrie_add(TernaryTrie *trie, const char *string, const char *payload) {
pthread_rwlock_wrlock(&trie->lock);
bool return_value = ternarytrie_add_persistent(trie, string, payload);
pthread_rwlock_unlock(&trie->lock);
return return_value;
}
char* ternarytrie_add_random(TernaryTrie *trie, const char *payload) {
pthread_rwlock_wrlock(&trie->lock);
// Generate random key
bool ok = false;
char *key = malloc(RANDOM_KEY_LENGTH + 1);
key[RANDOM_KEY_LENGTH] = '\0';
// We naively generate new keys until we find a key that isn't in the trie
// yet. With charset_len ** RANDOM_KEY_LENGTH sufficiently large, this isn't a
// problem, because the chances of collisions are extremely small.
while (!ok) {
for (int i = 0; i < RANDOM_KEY_LENGTH; i++) {
key[i] = charset[rand() % charset_len];
}
ok = ternarytrie_search_node(trie, key).child == NULL;
}
bool res = ternarytrie_add_persistent(trie, key, payload);
char *return_value;
if (res) {
return_value = key;
} else {
return_value = NULL;
free(key);
}
pthread_rwlock_unlock(&trie->lock);
return return_value;
}
/**
* Remove the given string from a TernaryTrie.
*
* @param trie trie to remove string from
* @param string string to remove
* @return true if the string was in the trie and thus removed, false otherwise
*/
bool ternarytrie_remove(TernaryTrie *trie, const char *string) {
pthread_rwlock_wrlock(&trie->lock);
bool return_value = false;
SearchResult res = ternarytrie_search_node(trie, string);
if (res.child == NULL) {
goto end;
}
trie->size--;
return_value = true;
if (res.parent != NULL) {
// We're removing a full leaf, so we calculate the offset of the character
// to remove from the parent
if (res.child->type == 2) {
size_t str_len = strlen(string);
size_t suffix_len = strlen(res.child->ptr.string);
ttnode_remove(res.parent, string[str_len - suffix_len - 1]);
}
// In the other case, the character to remove from the parent is the last
// character of the string
else if (res.child->size == 0) {
size_t i = 0;
while (string[i + 1] != DELIMITER) {
i++;
}
ttnode_remove(res.parent, string[i]);
} else {
res.child->type = 0;
goto end;
}
ttnode_free(res.child);
}
// We're in the root here
else {
res.child->type = 0;
}
end:
pthread_rwlock_unlock(&trie->lock);
return return_value;
}
/**
* Return the current size of the given trie.
*
* @param trie trie to return size for
* @return size of the trie
*/
size_t ternarytrie_size(TernaryTrie *trie) { return trie->size; }