394 lines
9.1 KiB
C
394 lines
9.1 KiB
C
#include <stdint.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <stdio.h>
|
|
#include <pthread.h>
|
|
|
|
#include "ternarytrie.h"
|
|
#include "ternarytrie_node.c"
|
|
|
|
typedef struct ttrie {
|
|
TernaryTrieNode *root;
|
|
size_t size;
|
|
char* file_path;
|
|
pthread_rwlock_t lock;
|
|
} TernaryTrie;
|
|
|
|
/**
|
|
* Allocate and initialize an empty TernaryTrie
|
|
*
|
|
* @return pointer to the empty TernaryTrie
|
|
*/
|
|
TernaryTrie *ternarytrie_init() {
|
|
TernaryTrie *trie = calloc(1, sizeof(TernaryTrie));
|
|
trie->root = ttnode_init();
|
|
pthread_rwlock_init(&trie->lock, NULL);
|
|
|
|
return trie;
|
|
}
|
|
|
|
/**
|
|
* De-allocate a TernaryTree by freeing its entire underlying structure.
|
|
*
|
|
* @param trie trie to free
|
|
*/
|
|
void ternarytrie_free(TernaryTrie *trie) {
|
|
ttnode_free(trie->root);
|
|
free(trie);
|
|
}
|
|
|
|
bool ternarytrie_add_internal(TernaryTrie *trie, const char *string, const char *payload);
|
|
|
|
void ternarytrie_populate(TernaryTrie *trie, const char *file_path) {
|
|
trie->file_path = my_strdup(file_path);
|
|
|
|
FILE* fp = fopen(file_path, "r");
|
|
|
|
// TODO properly handle this
|
|
if (fp == NULL) {
|
|
return;
|
|
}
|
|
|
|
// We read in lines of at most 8192 characters (sounds like enough)
|
|
char buffer[8192];
|
|
int i, j;
|
|
|
|
while (fgets(buffer, 8192, fp)) {
|
|
printf("%s", buffer);
|
|
// Find index of space character
|
|
i = 0;
|
|
|
|
while (buffer[i] != ' ') {
|
|
i++;
|
|
}
|
|
|
|
// Split the buffer into two strings, the key and the payload
|
|
buffer[i] = '\0';
|
|
|
|
j = i + 1;
|
|
|
|
// Now remove the newline character
|
|
while (buffer[j] != '\n') {
|
|
j++;
|
|
}
|
|
|
|
buffer[j] = '\0';
|
|
|
|
ternarytrie_add_internal(trie, buffer, buffer + i + 1);
|
|
}
|
|
|
|
fclose(fp);
|
|
}
|
|
|
|
typedef struct searchresult {
|
|
TernaryTrieNode *parent;
|
|
TernaryTrieNode *child;
|
|
} SearchResult;
|
|
|
|
SearchResult ternarytrie_search_node(TernaryTrie *trie, const char *string) {
|
|
SearchResult out = {NULL, NULL};
|
|
|
|
// Edge case for empty string
|
|
if (string[0] == DELIMITER) {
|
|
if (trie->root->type == 1) {
|
|
out.child = trie->root;
|
|
}
|
|
|
|
return out;
|
|
}
|
|
|
|
size_t i = 0;
|
|
TernaryTrieNode **node_ptr = &(trie->root);
|
|
TernaryTrieNode **child_ptr;
|
|
|
|
do {
|
|
child_ptr = ttnode_search(*node_ptr, string[i], false);
|
|
|
|
// We don't have to check whether *node_ptr is NULL, because if it was
|
|
// NULL, it wouldn't be in the binary tree.
|
|
if (child_ptr == NULL || *child_ptr == NULL) {
|
|
return out;
|
|
}
|
|
|
|
i++;
|
|
|
|
if (string[i] == DELIMITER || (*child_ptr)->type == 2) {
|
|
break;
|
|
}
|
|
|
|
node_ptr = child_ptr;
|
|
} while (1);
|
|
|
|
if ((*child_ptr)->type == 2) {
|
|
if (string[i] != DELIMITER &&
|
|
strcmp(string + i, (*child_ptr)->ptr.string) == 0) {
|
|
out.child = *child_ptr;
|
|
out.parent = *node_ptr;
|
|
}
|
|
}
|
|
// Here we know we've traversed through the entire string and have arrived at
|
|
// a node that isn't a full leaf
|
|
else if ((*child_ptr)->type == 1) {
|
|
out.child = *child_ptr;
|
|
out.parent = *node_ptr;
|
|
}
|
|
|
|
return out;
|
|
}
|
|
|
|
/**
|
|
* Returns whether the given string is present in the trie.
|
|
*
|
|
* @param trie trie to look in
|
|
* @param string string to look up
|
|
* @return true if the string is present in the trie, false otherwise
|
|
*/
|
|
char * ternarytrie_search(TernaryTrie *trie, const char *string) {
|
|
pthread_rwlock_rdlock(&trie->lock);
|
|
|
|
SearchResult res = ternarytrie_search_node(trie, string);
|
|
|
|
char* return_value = NULL;
|
|
|
|
if (res.child != NULL) {
|
|
return_value = res.child->payload;
|
|
}
|
|
|
|
pthread_rwlock_unlock(&trie->lock);
|
|
|
|
return return_value;
|
|
}
|
|
|
|
/**
|
|
* Add the given string to the TernaryTrie.
|
|
*
|
|
* @param trie trie to add string to
|
|
* @param string string to add
|
|
* @return true if the string wasn't present in the trie and thus added, false
|
|
* otherwise
|
|
*/
|
|
bool ternarytrie_add_internal(TernaryTrie *trie, const char *string, const char *payload) {
|
|
// Edge case for empty string
|
|
if (string[0] == DELIMITER) {
|
|
if (trie->root->type == 0) {
|
|
trie->root->type = 1;
|
|
trie->root->payload = my_strdup(payload);
|
|
trie->size++;
|
|
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
size_t i = 0;
|
|
TernaryTrieNode **node_ptr = &(trie->root);
|
|
TernaryTrieNode **new_node_ptr;
|
|
|
|
do {
|
|
new_node_ptr = ttnode_search(*node_ptr, string[i], true);
|
|
|
|
// ttnode_search will only return NULL with create true if the node to look
|
|
// in represents a full leaf. Therefore, we split the node and restart the
|
|
// iteration.
|
|
if (new_node_ptr == NULL) {
|
|
// It's possible we've ended up in the full leaf node that represents this
|
|
// string
|
|
if (strcmp(string + i, (*node_ptr)->ptr.string) == 0) {
|
|
return false;
|
|
}
|
|
|
|
ttnode_split(*node_ptr);
|
|
continue;
|
|
}
|
|
|
|
node_ptr = new_node_ptr;
|
|
|
|
// The search function has added the character to the node
|
|
i++;
|
|
|
|
// The next node in the string's path doesn't exist yet, so we add it to the
|
|
// trie
|
|
if (*node_ptr == NULL) {
|
|
TernaryTrieNode *new_node = ttnode_init();
|
|
|
|
// If there's a remaining part of the string, we add it to the leaf
|
|
if (string[i] != DELIMITER) {
|
|
ttnode_set_string(new_node, string + i);
|
|
} else {
|
|
new_node->type = 1;
|
|
}
|
|
|
|
new_node->payload = my_strdup(payload);
|
|
|
|
*node_ptr = new_node;
|
|
|
|
trie->size++;
|
|
|
|
return true;
|
|
}
|
|
} while (string[i] != DELIMITER);
|
|
|
|
// If we've arrived here, we've traversed through the entire string and have
|
|
// arrived at a node that already exists.
|
|
|
|
// The existing node is a full leaf, so we split it and make it
|
|
// represent our new string.
|
|
if ((*node_ptr)->type == 2) {
|
|
ttnode_split(*node_ptr);
|
|
}
|
|
// The string is already in the trie
|
|
else if ((*node_ptr)->type == 1) {
|
|
return false;
|
|
}
|
|
|
|
(*node_ptr)->type = 1;
|
|
(*node_ptr)->payload = my_strdup(payload);
|
|
|
|
trie->size++;
|
|
|
|
return true;
|
|
}
|
|
|
|
bool ternarytrie_add_persistent(TernaryTrie *trie, const char *string, const char *payload) {
|
|
bool return_value = false;
|
|
|
|
if (trie->file_path != NULL) {
|
|
// Easiest way to make sure we don't add duplicate entries
|
|
// We use an internal function that doesn't require a read lock, as we're
|
|
// already inside a write lock
|
|
if (ternarytrie_search_node(trie, string).child != NULL) {
|
|
return false;
|
|
}
|
|
|
|
FILE *fp = fopen(trie->file_path, "a");
|
|
|
|
if (fp == NULL) {
|
|
return false;
|
|
}
|
|
|
|
fputs(string, fp);
|
|
fputs(" ", fp);
|
|
fputs(payload, fp);
|
|
fputs("\n", fp);
|
|
|
|
fclose(fp);
|
|
}
|
|
|
|
// This function *should* always return true. Otherwise, the function would've
|
|
// exited because the string was found in the trie.
|
|
return ternarytrie_add_internal(trie, string, payload);
|
|
}
|
|
|
|
bool ternarytrie_add(TernaryTrie *trie, const char *string, const char *payload) {
|
|
pthread_rwlock_wrlock(&trie->lock);
|
|
|
|
bool return_value = ternarytrie_add_persistent(trie, string, payload);
|
|
|
|
pthread_rwlock_unlock(&trie->lock);
|
|
|
|
return return_value;
|
|
}
|
|
|
|
char* ternarytrie_add_random(TernaryTrie *trie, const char *payload) {
|
|
pthread_rwlock_wrlock(&trie->lock);
|
|
|
|
// Generate random key
|
|
bool ok = false;
|
|
char *key = malloc(RANDOM_KEY_LENGTH + 1);
|
|
key[RANDOM_KEY_LENGTH] = '\0';
|
|
|
|
// We naively generate new keys until we find a key that isn't in the trie
|
|
// yet. With charset_len ** RANDOM_KEY_LENGTH sufficiently large, this isn't a
|
|
// problem, because the chances of collisions are extremely small.
|
|
while (!ok) {
|
|
for (int i = 0; i < RANDOM_KEY_LENGTH; i++) {
|
|
key[i] = charset[rand() % charset_len];
|
|
}
|
|
|
|
ok = ternarytrie_search_node(trie, key).child == NULL;
|
|
}
|
|
|
|
bool res = ternarytrie_add_persistent(trie, key, payload);
|
|
char *return_value;
|
|
|
|
if (res) {
|
|
return_value = key;
|
|
} else {
|
|
return_value = NULL;
|
|
free(key);
|
|
}
|
|
|
|
pthread_rwlock_unlock(&trie->lock);
|
|
|
|
return return_value;
|
|
}
|
|
|
|
|
|
/**
|
|
* Remove the given string from a TernaryTrie.
|
|
*
|
|
* @param trie trie to remove string from
|
|
* @param string string to remove
|
|
* @return true if the string was in the trie and thus removed, false otherwise
|
|
*/
|
|
bool ternarytrie_remove(TernaryTrie *trie, const char *string) {
|
|
pthread_rwlock_wrlock(&trie->lock);
|
|
|
|
bool return_value = false;
|
|
|
|
SearchResult res = ternarytrie_search_node(trie, string);
|
|
|
|
if (res.child == NULL) {
|
|
goto end;
|
|
}
|
|
|
|
trie->size--;
|
|
return_value = true;
|
|
|
|
if (res.parent != NULL) {
|
|
// We're removing a full leaf, so we calculate the offset of the character
|
|
// to remove from the parent
|
|
if (res.child->type == 2) {
|
|
size_t str_len = strlen(string);
|
|
size_t suffix_len = strlen(res.child->ptr.string);
|
|
|
|
ttnode_remove(res.parent, string[str_len - suffix_len - 1]);
|
|
}
|
|
// In the other case, the character to remove from the parent is the last
|
|
// character of the string
|
|
else if (res.child->size == 0) {
|
|
size_t i = 0;
|
|
|
|
while (string[i + 1] != DELIMITER) {
|
|
i++;
|
|
}
|
|
|
|
ttnode_remove(res.parent, string[i]);
|
|
} else {
|
|
res.child->type = 0;
|
|
|
|
goto end;
|
|
}
|
|
|
|
ttnode_free(res.child);
|
|
}
|
|
// We're in the root here
|
|
else {
|
|
res.child->type = 0;
|
|
}
|
|
|
|
end:
|
|
pthread_rwlock_unlock(&trie->lock);
|
|
|
|
return return_value;
|
|
}
|
|
|
|
/**
|
|
* Return the current size of the given trie.
|
|
*
|
|
* @param trie trie to return size for
|
|
* @return size of the trie
|
|
*/
|
|
size_t ternarytrie_size(TernaryTrie *trie) { return trie->size; }
|