lander/tries/src/ternarytrie.c

326 lines
7.5 KiB
C
Raw Normal View History

2022-11-15 16:21:27 +01:00
#include "ternarytrie.h"
#include "ternarytrie_node.c"
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
2022-11-15 16:21:27 +01:00
typedef struct ttrie {
TernaryTrieNode *root;
size_t size;
char* file_path;
2022-11-15 16:21:27 +01:00
} TernaryTrie;
/**
* Allocate and initialize an empty TernaryTrie
*
* @return pointer to the empty TernaryTrie
*/
2022-11-15 17:05:14 +01:00
TernaryTrie *ternarytrie_init() {
2022-11-15 16:21:27 +01:00
TernaryTrie *node = calloc(1, sizeof(TernaryTrie));
node->root = ttnode_init();
return node;
}
/**
* De-allocate a TernaryTree by freeing its entire underlying structure.
*
* @param trie trie to free
*/
2022-11-15 17:05:14 +01:00
void ternarytrie_free(TernaryTrie *trie) {
2022-11-15 16:21:27 +01:00
ttnode_free(trie->root);
free(trie);
}
bool ternarytrie_add_internal(TernaryTrie *trie, const char *string, const char *payload);
void ternarytrie_populate(TernaryTrie *trie, const char *file_path) {
trie->file_path = my_strdup(file_path);
FILE* fp = fopen(file_path, "r");
// TODO properly handle this
if (fp == NULL) {
return;
}
// We read in lines of at most 8192 characters (sounds like enough)
char buffer[8192];
int i, j;
while (fgets(buffer, 8192, fp)) {
printf("%s", buffer);
// Find index of space character
i = 0;
while (buffer[i] != ' ') {
i++;
}
// Split the buffer into two strings, the key and the payload
buffer[i] = '\0';
j = i + 1;
// Now remove the newline character
while (buffer[j] != '\n') {
j++;
}
buffer[j] = '\0';
ternarytrie_add_internal(trie, buffer, buffer + i + 1);
}
fclose(fp);
}
2022-11-15 16:21:27 +01:00
typedef struct searchresult {
TernaryTrieNode *parent;
TernaryTrieNode *child;
} SearchResult;
SearchResult ternarytrie_search_node(TernaryTrie *trie, const char *string) {
SearchResult out = {NULL, NULL};
// Edge case for empty string
if (string[0] == DELIMITER) {
if (trie->root->type == 1) {
out.child = trie->root;
}
return out;
}
size_t i = 0;
TernaryTrieNode **node_ptr = &(trie->root);
TernaryTrieNode **child_ptr;
do {
child_ptr = ttnode_search(*node_ptr, string[i], false);
// We don't have to check whether *node_ptr is NULL, because if it was
// NULL, it wouldn't be in the binary tree.
if (child_ptr == NULL || *child_ptr == NULL) {
return out;
}
i++;
if (string[i] == DELIMITER || (*child_ptr)->type == 2) {
break;
}
node_ptr = child_ptr;
} while (1);
if ((*child_ptr)->type == 2) {
if (string[i] != DELIMITER &&
strcmp(string + i, (*child_ptr)->ptr.string) == 0) {
out.child = *child_ptr;
out.parent = *node_ptr;
}
}
// Here we know we've traversed through the entire string and have arrived at
// a node that isn't a full leaf
else if ((*child_ptr)->type == 1) {
out.child = *child_ptr;
out.parent = *node_ptr;
}
return out;
}
/**
* Returns whether the given string is present in the trie.
*
* @param trie trie to look in
* @param string string to look up
* @return true if the string is present in the trie, false otherwise
*/
char * ternarytrie_search(TernaryTrie *trie, const char *string) {
2022-11-15 16:21:27 +01:00
SearchResult res = ternarytrie_search_node(trie, string);
if (res.child != NULL) {
return res.child->payload;
}
return NULL;
2022-11-15 16:21:27 +01:00
}
/**
* Add the given string to the TernaryTrie.
*
* @param trie trie to add string to
* @param string string to add
* @return true if the string wasn't present in the trie and thus added, false
* otherwise
*/
bool ternarytrie_add_internal(TernaryTrie *trie, const char *string, const char *payload) {
2022-11-15 16:21:27 +01:00
// Edge case for empty string
if (string[0] == DELIMITER) {
if (trie->root->type == 0) {
trie->root->type = 1;
trie->root->payload = my_strdup(payload);
2022-11-15 16:21:27 +01:00
trie->size++;
return true;
}
return false;
}
size_t i = 0;
TernaryTrieNode **node_ptr = &(trie->root);
TernaryTrieNode **new_node_ptr;
do {
new_node_ptr = ttnode_search(*node_ptr, string[i], true);
// ttnode_search will only return NULL with create true if the node to look
// in represents a full leaf. Therefore, we split the node and restart the
// iteration.
if (new_node_ptr == NULL) {
// It's possible we've ended up in the full leaf node that represents this
// string
if (strcmp(string + i, (*node_ptr)->ptr.string) == 0) {
return false;
}
ttnode_split(*node_ptr);
continue;
}
node_ptr = new_node_ptr;
// The search function has added the character to the node
i++;
// The next node in the string's path doesn't exist yet, so we add it to the
// trie
if (*node_ptr == NULL) {
TernaryTrieNode *new_node = ttnode_init();
// If there's a remaining part of the string, we add it to the leaf
if (string[i] != DELIMITER) {
ttnode_set_string(new_node, string + i);
} else {
new_node->type = 1;
}
new_node->payload = my_strdup(payload);
2022-11-15 16:21:27 +01:00
*node_ptr = new_node;
trie->size++;
return true;
}
} while (string[i] != DELIMITER);
// If we've arrived here, we've traversed through the entire string and have
// arrived at a node that already exists.
// The existing node is a full leaf, so we split it and make it
// represent our new string.
if ((*node_ptr)->type == 2) {
ttnode_split(*node_ptr);
}
// The string is already in the trie
else if ((*node_ptr)->type == 1) {
return false;
}
(*node_ptr)->type = 1;
(*node_ptr)->payload = my_strdup(payload);
2022-11-15 16:21:27 +01:00
trie->size++;
return true;
}
bool ternarytrie_add(TernaryTrie *trie, const char *string, const char *payload) {
if (trie->file_path != NULL) {
// Easiest way to make sure we don't add duplicate entries
if (ternarytrie_search(trie, string) != NULL) {
return false;
}
FILE *fp = fopen(trie->file_path, "a");
if (fp == NULL) {
return false;
}
fputs(string, fp);
fputs(" ", fp);
fputs(payload, fp);
fputs("\n", fp);
fclose(fp);
}
return ternarytrie_add_internal(trie, string, payload);
}
2022-11-15 16:21:27 +01:00
/**
* Remove the given string from a TernaryTrie.
*
* @param trie trie to remove string from
* @param string string to remove
* @return true if the string was in the trie and thus removed, false otherwise
*/
bool ternarytrie_remove(TernaryTrie *trie, const char *string) {
SearchResult res = ternarytrie_search_node(trie, string);
if (res.child == NULL) {
return false;
}
trie->size--;
if (res.parent != NULL) {
// We're removing a full leaf, so we calculate the offset of the character
// to remove from the parent
if (res.child->type == 2) {
size_t str_len = strlen(string);
size_t suffix_len = strlen(res.child->ptr.string);
ttnode_remove(res.parent, string[str_len - suffix_len - 1]);
}
// In the other case, the character to remove from the parent is the last
// character of the string
else if (res.child->size == 0) {
size_t i = 0;
while (string[i + 1] != DELIMITER) {
i++;
}
ttnode_remove(res.parent, string[i]);
} else {
res.child->type = 0;
return true;
}
ttnode_free(res.child);
}
// We're in the root here
else {
res.child->type = 0;
}
return true;
}
/**
* Return the current size of the given trie.
*
* @param trie trie to return size for
* @return size of the trie
*/
2022-11-15 17:05:14 +01:00
size_t ternarytrie_size(TernaryTrie *trie) { return trie->size; }